From 7f4b2651792603cdcaa581faed00d26b205637f9 Mon Sep 17 00:00:00 2001 From: Jan Benda Date: Wed, 6 Feb 2019 18:33:53 +0100 Subject: [PATCH] [pulsetracker] simplified Dexters additions --- thunderfish/DextersThunderfishAddition/README | 21 - .../DextersThunderfishAddition/__init__.py | 0 .../DextersThunderfishAddition/allfiles.txt | 155 -- .../analyseDexRefactor.py | 2276 ----------------- .../analyseDexRefactorShort.py | 1995 --------------- .../analyseDexThinned.py | 2262 ---------------- .../DextersThunderfishAddition/analyzeEods.py | 1104 -------- .../analyzeEods_lowpass.py | 1130 -------- .../let_them_rum_allfiles_DexThunder.sh | 6 - .../DextersThunderfishAddition/run_example.sh | 5 - ...ThunderfishAddition.py => pulsetracker.py} | 199 +- 11 files changed, 83 insertions(+), 9070 deletions(-) delete mode 100644 thunderfish/DextersThunderfishAddition/README delete mode 100644 thunderfish/DextersThunderfishAddition/__init__.py delete mode 100644 thunderfish/DextersThunderfishAddition/allfiles.txt delete mode 100644 thunderfish/DextersThunderfishAddition/analyseDexRefactor.py delete mode 100644 thunderfish/DextersThunderfishAddition/analyseDexRefactorShort.py delete mode 100644 thunderfish/DextersThunderfishAddition/analyseDexThinned.py delete mode 100644 thunderfish/DextersThunderfishAddition/analyzeEods.py delete mode 100644 thunderfish/DextersThunderfishAddition/analyzeEods_lowpass.py delete mode 100644 thunderfish/DextersThunderfishAddition/let_them_rum_allfiles_DexThunder.sh delete mode 100644 thunderfish/DextersThunderfishAddition/run_example.sh rename thunderfish/{DextersThunderfishAddition/DextersThunderfishAddition.py => pulsetracker.py} (89%) diff --git a/thunderfish/DextersThunderfishAddition/README b/thunderfish/DextersThunderfishAddition/README deleted file mode 100644 index 6daee32c..00000000 --- a/thunderfish/DextersThunderfishAddition/README +++ /dev/null @@ -1,21 +0,0 @@ -leticia_filedata.txt -- long files with filename, date and day/night and time - missing some files -allfiles.txt -- all long files, but without details - - - -### analyse_pulse_data with the .WAV file as input produces: -eods5_70914L01_F15_1742.npy -- numpy array with results from analyze_pulse_data - -### this eodsfile can be fed to analyzeEods_lowpass.py to analyze the frequencies and amplitudes and produce the plots over the length of the full recordings... - - -70914L01_F15_1742_freqs2_lp.npy -- computed frequencies, meaned over seconds/half seconds -70914L01_F15_1742_amps2_lp.npy -- computed amplitudes, averaged over seconds/... --- Numpy 2d-arrays, first axis : different fish classes, second axis: timesteps (seconds/half_seconds) - -70914L01_F15_1742_AmpFreq7_lp.pdf -- pdf with plotted EOD-amplitudes and frequencies, no classes plotted, highest number is latest version, lp = lowpass smoothing - -foraging_ ... npz file with status far, near, on, each np.nan for not this status and 1 if it has the status at a given timestep, same time resolution as freqs / amps - files - -ontimes, other files - outdated - diff --git a/thunderfish/DextersThunderfishAddition/__init__.py b/thunderfish/DextersThunderfishAddition/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/thunderfish/DextersThunderfishAddition/allfiles.txt b/thunderfish/DextersThunderfishAddition/allfiles.txt deleted file mode 100644 index 81646503..00000000 --- a/thunderfish/DextersThunderfishAddition/allfiles.txt +++ /dev/null @@ -1,155 +0,0 @@ -60615L01F9.WAV -60116L01G9.WAV -60115L01F10.WAV -60615L09G7.WAV -70730L06_G10_1737.WAV -70729L01_G10_1743.WAV -70729L01_G10_557.WAV -70524L01_G10.WAV -71112L01_G10_608.WAV -60926L01F13.WAV -70917L01_G10_1738.WAV -71113L01_G10_1757.WAV -70916L01_G10_630.WAV -70730L06_F15_1735.WAV -71110L01_F12_1650.WAV -70730L01_G10_609.WAV -80118L01_F12_1703.WAV -40113L01F10.WAV -70728L01_F15_1034.WAV -70116L01E14.WAV -60722L02G14.WAV -70320L01_B6.WAV -61107L01F14.WAV -70730L01_F15_605.WAV -60722L02D6.WAV -60722L01D6.WAV -60926L01B3.WAV -70114L02D14.WAV -70522L07_D10.WAV -70917L01_D7_740.WAV -61106L01F14Noche.WAV -60721L01G14.WAV -51104L01Embarcadero.WAV -71113L01_G10_636.WAV -80121L01_F13_1731.WAV -70915L01_G10_735.WAV -80120L01_F13_640.WAV -80121L01_F13_608.WAV -71112L01_F12_1817.WAV -60725L23G12.WAV -71111L01_G10_1810.WAV -70916L02_Electrophorus embarcadero_1905.WAV -70729L01_F15_1740.WAV -51106L01Electrophorus.WAV -31105L01F14.WAV -60612L02rondoniF3.WAV -70915L01_F15_1805.WAV -70917L02_Electrophorus embarcadero_1849.WAV -70917L01_G10_724.WAV -61103L01F14.WAV -71111L10_F12_1807.WAV -60613L02rondoniF9.WAV -40320L01_G11.WAV -60925L01F12.WAV -60925L01F11.WAV -70728L01_G10_1048.WAV -61108L01Electrophorus.WAV -60924L01F12.WAV -70915L01_Electrophorus embarcadero_1242.WAV -60722L01G14.WAV -60926L01F12.WAV -80121L01_F11_606.WAV -70730L01_Solo Electrophorus embarcadero_735.WAV -71113L01_F12_559.WAV -70114L01D14.WAV -70524L02_D10.WAV -70522L01_G12.WAV -60614L22rondoniG9.WAV -70915L01_G10_1802.WAV -70320L01_B6.WAV -40112L01G9.WAV -70730L02_Solo Electrophorus embarcadero.WAV -60923L01.WAV -70914L01_F15_1742.WAV -70115L01D14.WAV -70727L01_E14_1641.WAV -30927L04E12.WAV -61106L01Electrophorus.WAV -70917L01_D7_1739.WAV -60723L02G14.WAV -30924L01C2.WAV -40116L01G9.WAV -61107L01Electrophorus.WAV -60723L01G14.WAV -71112L01_G10_1818.WAV -70915L02_Electrophorus embarcadero_1900.WAV -60725L01G14.WAV -70521L01_D4.WAV -70729L01_Solo Electrophorus embarcadero_733.WAV -70728L01_F15_1721.WAV -70521L01_F12.WAV -30104L01_Solo Electrophorus embarcadero_1732.WAV -60613L01rondoniF11.WAV -61106L01F14.WAV -70914L01_E13_1735.WAV -40114L01F10.WAV -60724L01B1.WAV -80119L01_Downstream fuera grilla_1425.WAV -71113L01_F12_1755.WAV -70522L48_G12.WAV -80119L01_G12_1747.WAV -70319L01_F1.WAV -70112L01E15.WAV -80121L01_F11_1730.WAV -80120L01_G10_1731.WAV -80120L01_G10_626.WAV -30925L06F11.WAV -61107L01F14Noche.WAV -70524L01_D10.WAV -70916L02_F15_1804.WAV -60925L06F12.WAV -60113L01F10.WAV -70915L01_F13_738.WAV -60721L01G13.WAV -51107L01Electrophorus.WAV -70523L01_G11.WAV -50926L08F13.WAV -70917L01_Electrophorus embarcadero_811.WAV -60114L01F10.WAV -70522L01_D10.WAV -70317L01_B1.WAV -70520L01_D13.WAV -70729L01_F15_553.WAV -70916L01_F15_648.WAV -40317L01_G11.WAV -60723L01D5.WAV -70317L01_G11.WAV -71111L01_F12_603.WAV -40319L01_F1.WAV -71112L01_F12_605.WAV -70916L01_Electrophorus embarcadero_830.WAV -60926L04F12.WAV -61104L01F14.WAV -70520L01_F10.WAV -60924L03C2.WAV -70319L01_G11.WAV -60927L08F13.WAV -60927L01F13.WAV -60724L02G14.WAV -70916L01_G10_1805.WAV -70729L02_Solo Electrophorus embarcadero_1841.WAV -70728L01_G10_1725.WAV -71111L01_G10_1339.WAV -70523L01_D10.WAV -70320L01_G11.WAV -80120L01_F13_1733.WAV -61106L02Electrophorus.WAV -60724L01G14.WAV -60723L02D5.WAV -51105L01Embarcadero.WAV -70113L01D14.WAV -40115L01F10.WAV -70727L01_G10_1643.WAV -61105L01F14.WAV -60724L02B1.WAV diff --git a/thunderfish/DextersThunderfishAddition/analyseDexRefactor.py b/thunderfish/DextersThunderfishAddition/analyseDexRefactor.py deleted file mode 100644 index f9617e7c..00000000 --- a/thunderfish/DextersThunderfishAddition/analyseDexRefactor.py +++ /dev/null @@ -1,2276 +0,0 @@ -# Script to detect and classify EODs in recordings of weakly electric pulse -# fish, Dexter Früh, 2018 -# # it is suggested to save the recording in -# workingdirectory/recording/recording.WAV - -# results will be saved in workingdirectory/recording/ -# -# input: -# - [Recorded Timeseries] recording.WAV -# outputs(optional): -# - [Detected and Classified EODs] -# (Numpy Array with Shape (Number of EODs, 4 (Attributes of EODs)), -# with the EOD-Attributes -# - x-location of the EOD -# (time/x-coordinate/datapoint in recording) -# - y-location of the EOD -# (Amplitude of the positive peak of the pulse-EOD) -# - height of the EOD(largest distance between peak and through in the EOD) -# - class of the EOD -# eods_recording.npy -# - [plots of the results of each analyse step for each -# analysepart (timeinterval of length = deltat) of the recording] -# -# required command line arguments at function call -# - save : if True, save the results to a numpy file (possibly -# overwrite existing) -# - plot : if True, plot results in each analysestep -# - new : if True, do a new analysis of the recording, even if there -# is an existing analyzed .npy file with the right name. -# -# call with: -# python3 scriptname.py save plot new (starttime endtime[sec] for only -# partial analysis) -# -# other parameters are behind imports and some hardcoded at the relevant -# codestep -import sys -import numpy as np -import copy -from scipy.stats import gmean -from scipy import stats -from scipy import signal -from scipy import optimize -import matplotlib -from fish import ProgressFish -import matplotlib.pyplot as plt -from thunderfish.dataloader import open_data -from thunderfish.peakdetection import detect_peaks -from scipy.interpolate import interp1d -from scipy.signal import savgol_filter -from collections import deque -import ntpath -import nixio as nix -import time -import os -from shutil import copy2 - -from ownDataStructures import Peak, Tr, Peaklist -import DextersThunderfishAddition as dta - -from IPython import embed -# parameters for the analysis - -deltat = 30.0 # seconds of buffer size -thresh = 0.04 # minimal threshold for peakdetection -peakwidth = 20 # width of a peak and minimal distance between two EODs - -# basic parameters for thunderfish.dataloader.open_data -verbose = 0 -channel = 0 - -# timeinterval to analyze other than the whole recording -#starttime = 0 -#endtime = 0 -#timegiven = False - -def main(): # analyse_dex.py filename save plot new (optional starttime endtime [sec]) - home = os.path.expanduser('~') - os.chdir(home) - # defaults for optional arguments - timegiven = False - plot_steps = False - - # parse command line arguments - filepath, save, plot, new (, starttime, - # endtime) - filepath = sys.argv[1] - #thresh = 0.05 - save = int(sys.argv[2]) - plot_steps = int(sys.argv[3]) - new = int(sys.argv[4]) - if len(sys.argv[:])>5: - timegiven = True - starttime = int(sys.argv[5]) - endtime = int(sys.argv[6]) - #print(starttime, endtime) - # plot_steps = 1 - peaks = np.array([]) - troughs = np.array([]) - cutsize = 20 - maxwidth = 50 #10 - ultimate_threshold = thresh+0.01 - filename = path_leaf(filepath) - - proceed = input('Currently operates in home directory. If given a pulsefish recording filename.WAV, then a folder filename/ will be created in the home directory and all relevant files will be stored there. continue? [y/n]').lower() - if proceed == 'n': - quit() - elif proceed == 'y': - pass - #do something - elif proceed != 'y': - quit() - - ### ## ask user before overwriting - # if save == 1: - # proceed = input('Really want to save data and possibly overwrite existing? [y/n]').lower() - # if proceed == 'n': - # quit() - # elif proceed == 'y': - # printcat file | while read line - # do - #do something - # done('continuing') - # elif proceed != 'y': - # quit() - datasavepath = filename[:-4] - print(datasavepath) - eods_len = 0 - - ### ## starting analysis if it is wished or the analyzed EODs-file is not available in the working directory - if new == 1 or not os.path.exists(filename[:-4]+"/eods5_"+filename[:-3]+"npy"): - - ### ## import data - with open_data(filepath, channel, deltat, 0.0, verbose) as data: - - if save == 1 or save == 0: - # datasavepath = filename[:-4]+"/"+filename - if not os.path.exists(datasavepath): - os.makedirs(datasavepath) - copy2(filepath, datasavepath) - samplerate = data.samplerate - - ### ## split datalength into smaller blocks - nblock = int(deltat*data.samplerate) - if timegiven == True: - #print(starttime, samplerate) - parttime1 = starttime*samplerate - # parttime1 = samplerate * 10270 - parttime2 = endtime*samplerate - data = data[parttime1:parttime2] - if len(data)%nblock != 0: - blockamount = len(data)//nblock + 1 - else: - blockamount = len(data)//nblock - bigblock = [] - - ### ## output first (0%) progress bar - print('blockamount: ' , blockamount) - progress = 0 - print(progress, '%' , end = " ", flush = True) - fish = ProgressFish(total = blockamount) - olddatalen = 0 - startblock = 0 - ## iterating through the blocks, detecting peaks in each block - for idx in range(startblock, blockamount): - - ### ## print progress - if progress < (idx*100 //blockamount): - #print(progress, '%' , end = " ", flush = True) - progress = (idx*100)//blockamount - # print('.' , end = '') - progressstr = 'Partstatus: '+ str(0) + ' '*2 + ' % (' + '0' + ' '*4+ '/' + '?'+' '*4+ '), Filestatus:' - fish.animate(amount = idx, dexextra = progressstr) - progressstr = 'Partstatus: '+ 'Part ' + '0'+ '/''5'+' Filestatus:' - fish.animate(amount = idx, dexextra = progressstr) - - ### ## take debugging times, not used right now - time1 = time.time() - #print('took ', time1-time0, 's') - time0 = time1 - - # time measurement of parts of the algorithm to find time - # efficiency bottlenecks - bottletime = [] - bottletime.append(time.time()) #0 - datx = data[idx*nblock:(idx+1)*nblock] - ### ## smoothing of the timeseries and calculating autocorrelation - not used - #from scipy.signal import butter, lfilter - #datx = savgol_filter(datx, 11, 7) - #fs = samplerate # 1 ns -> 1 GHz - #cutoff = samplerate/10 # 10 MHz - #B, A = butter(5, cutoff / (fs / 3), btype='low') # 1st order Butterworth low-pass - #datx = lfilter(B, A, datx, axis=0) - #plt.plot(datx) - #plt.show() - #sig = data[-320000:-1] - #autocorr = signal.fftconvolve(sig, sig, mode='full') - #plt.plot(autocorr) - #plt.show() - #f, Pxx_den = signal.periodogram(sig, samplerate) - #plt.plot(Pxx_den) - #plt.show() - #x = savgol_filter(x, 11, 7) - - # ---------- analysis -------------------------------------------------------------------------- - # step1: detect peaks in timeseries - pk, tr = detect_peaks(datx, thresh) - troughs = tr - bottletime.append(time.time()) #1 - # continue with analysis only if multiple peaks are detected - if len(pk) > 2: - def makepeaklist_refactor(pk,tr,data): - ### ## create 'peaks' with x,y and height and discard peaks that seem to be no EODs based on their width and simple features like - no minimum close to the maximum. - # decide whether a peak or a through is detected first - pkfirst = int((min(pk[0],tr[0])= 0 and right_tr_ind < len(tr): - # ltr_x = tr[left_tr_ind] - # ltr_y = datx[ltr_x] - # rtr_x = tr[right_tr_ind] - # rtr_y = datx[rtr_x] - if min((pk_x - ltr_x),(rtr_x -pk_x)) > peakwidth: - pk_r[...] = False - elif max((pk_x - ltr_x),(rtr_x -pk_x)) <= peakwidth: - pk_h[...] = pk_y - min(ltr_y, rtr_y) - else: - if (pk_x-ltr_x)<(rtr_x-pk_x): - pk_h[...] = pk_y-ltr_y - else: - pk_h[...] = pk_y -rtr_y - elif left_tr_ind == -1: - if rtr_x-pk_x > peakwidth: - pk_r[...] = False - else: - pk_h[...] = pk_y- rtr_y - elif right_tr_ind == len(tr): - if pk_x-ltr_x > peakwidth: - pk_r[...] = False - else: - pk_h[...] = pk_y-ltr_y - peaks = np.array([peaks_x, peaks_y, peaks_h], dtype = np.float)[:,peaks_real!=0] - return peaks - peaks = dta.makeeventlist(pk,tr,datx,peakwidth) - #plt.plot(data[0:32000]) - #for ik in peaks.list[0:400]: - # plt.scatter(i.x, i.height) - #plt.show() - bottletime.append(time.time()) #2 - def discardnearbypeaks_refactor(peaks, peakwidth): - ### ## discard peaks that are close to each other, as a EOD mostly has more than one maximum and only one of the maxima is considered to be the EOD/EODlocation - unchanged = False - while unchanged == False: - x_diffs = np.diff(peaks[0]) - peaks_heights = peaks[2] - peaks_delete = np.zeros(len(peaks[0])) - for i, diff in enumerate(x_diffs): - if diff < peakwidth: - if peaks_heights[i+1] > peaks_heights[i] : - peaks_delete[i] = 1 - else: - peaks_delete[i+1] = 1 - peaks = peaks[:,peaks_delete!=1] - if np.count_nonzero(peaks_delete)==0: - unchanged = True - return peaks - peakindices, peakx, peakh = dta.discardnearbyevents(peaks[0],peaks[1],peakwidth) - peaks = peaks[:,peakindices] -# plt.plot(datx) -# plt.scatter(peaks[0],peaks[1]) -# plt.show() -# ### ## tries to calculate the noiselevel in the current recording part. Might actually not do anything at all, because the ultimate_threshold might be larger eitherway. some recordings have some exploitable data below this threshold, but most don't. And the rate of errors just gets too big for such small peaks. -# if len(peaks.list) > 2: -# tsh_n = calc_tsh_noise(peaks.list, datx) - bottletime.append(time.time()) #5 - # if len(peaks.list) > 2: - # noisediscard(peaks, ultimate_threshold, ultimate_threshold) - bottletime.append(time.time()) #6 - progressstr = 'Partstatus: '+ 'Part ' + '1'+ '/''5'+' Filestatus:' - fish.animate(amount = idx, dexextra = progressstr) - if len(peaks) > 0: - bottletime.append(time.time()) #7 - ### ## connects the current part with the one that came before, to allow for a continuous analysis - print('peaklist.len: ',peaklist.len) - if idx >= startblock+1: - peaklist = connect_blocks(peaklist) - else: - peaklist = Peaklist([]) - bottletime.append(time.time()) #8 - #print('\n ') - #print('cut_snips, with ' ,len(peaks.list), 'peaks') - # cuts snippets from the data time series around the peaks, interpolates them and aligns them - def cut_snippets_refactor(data, peaks, rnge): - snippets = [] - positions = np.array(peaks[0],dtype=np.int) - heights = peaks[2] - intfact = 10 - alignrange = 1.5 - alignwidth = int(np.ceil(alignrange * intfact) ) - for pos in positions: - snippets.append(data[(pos+rnge[0]):(pos+rnge[1])]) - scaled_snips = np.empty_like(snippets) - for i, snip in enumerate(snippets): - top = -rnge[0] - #plt.plot(snip) - scaled_snips[i] = snip * 1/heights[i] - #plt.plot(scaledsnips[i]) - #plt.show() - aligned_snips = np.empty((len(snippets), (rnge[1]-rnge[0])* - intfact-(2*alignwidth)-intfact)) - ipoled_snips = np.empty((len(snippets), (rnge[1]-rnge[0])*intfact-intfact)) - - for i, snip in enumerate(scaled_snips): - if len(snip) < ((rnge[1]-rnge[0])): - if i == 0: - snip = np.concatenate([np.zeros([((rnge[1]-rnge[0]) - len(snip))]),np.array(snip)]) - if i == len(scaledsnips): - snip = np.concatenate([snip, np.zeros([((rnge[1]-rnge[0])-len(snip))])]) - else: - snip = np.zeros([(rnge[1]-rnge[0])]) - interpolation = interpol(snip, 'cubic') #if len(snip) > 0 else np.zeros([(rnge[1]-rnge[0]-1)*intfact ]) - interpoled_snip = interpolation(np.arange(0, len(snip)-1, 1/intfact)) - intsnipheight = np.max(interpoled_snip) - np.min(interpoled_snip) - if intsnipheight == 0: - intsnipheight = 1 - interpoled_snip = (interpoled_snip - max(interpoled_snip))* 1/intsnipheight - ipoled_snips[i] = interpoled_snip - - mean = np.mean(ipoled_snips, axis = 0) - meantop = np.argmax(mean) - #plt.plot(mean) - #plt.show() - #plt.plot(mean[10*-rnge[0]-10*5:-10*rnge[1]+21]) - #plt.show() - for i, interpoled_snip in enumerate(ipoled_snips): - cc = crosscorrelation(interpoled_snip[alignwidth:-alignwidth], mean) - #cc = crosscorrelation(interpoled_snip[15 + 10*-rnge[0]-10*7:-15+ -10*rnge[1]+ 31], mean[10*-rnge[0]-10*7:-10*rnge[1]+31]) - offset = -15 + np.argmax(cc) - interpoled_snip = interpoled_snip[15-offset:-15-offset] if offset != -15 else interpoled_snip[30:] - #plt.plot(interpoled_snip) - if len(interpoled_snip[~np.isnan(interpoled_snip)])>0: - aligned_snips[i] = interpoled_snip - #plt.show() - return snippets, aligned_snips - snips, aligned_snips = dta.cut_snippets(datx,peaks[0], 15, int_met = "cubic", int_fact = 10,max_offset = 1.5) - # snips, scaledsnips = cut_snippets(datx, peaks.list, [-15,15]) - #wpf = wpfeats(scaledsnips) - #print(wpf[0]) - #print('pc') - progressstr = 'Partstatus: '+ 'Part ' + '2'+ '/''5'+' Filestatus:' - fish.animate(amount = idx, dexextra = progressstr) - #print('len ', len(scaledsnips)) - #print(scaledsnips) - def pc_refactor(cutsnippets): - # (observations, features) matrix - M = np.empty([len(cutsnippets), len(cutsnippets[0])]) - for i, snip in enumerate(cutsnippets): - M[i] = snip[:] - from sklearn.preprocessing import StandardScaler - from sklearn.decomposition import PCA - #StandardScaler().fit_transform(M) - pca = PCA() - pc_comp= pca.fit_transform(M) - return pc_comp - print(aligned_snips) - # calculates principal components - pcs = dta.pc(aligned_snips)#pc_refactor(aligned_snips) - #print('dbscan') - - # clusters the features(principal components) using dbscan algorithm. clusterclasses are saved into the peak-object as Peak.pccl - order = 5 - minpeaks = 3 if deltat < 2 else 10 - def dbscan_refactor(pcs, peaks, order, eps, min_samples, takekm, olddatalen): - # pcs (samples, features) - # X (samples, features) - from sklearn.cluster import DBSCAN - from sklearn import metrics - from mpl_toolkits.mplot3d import Axes3D - from sklearn.cluster import AgglomerativeClustering - try: - X = pcs[:,:order] - except: - X = pcs[:,order] - # ############################################################################# - # Compute DBSCAN - db = DBSCAN(eps, min_samples).fit(X) - from sklearn.cluster import KMeans - core_samples_mask = np.zeros_like(db.labels_, dtype=bool) - core_samples_mask[db.core_sample_indices_] = True - labels = db.labels_ ##### TODO ###### --- irgendwo Indexfehler oder so, last change - pcs richtige DImension - #peaks = np.array([np.append(peaks[:,i],labels[i]) for i in range(len(peaks[0]))]) - peaks = np.append(peaks,[labels], axis = 0) - return peaks - - peaks = dta.cluster_events(pcs, peaks, order, 0.4, minpeaks, False, olddatalen, method = 'DBSCAN') - #peaks = dbscan_refactor(pcs, peaks, order, 0.4, minpeaks, False, olddatalen) - - #plotPCclasses_ref(peaks, datx) - olddatalen = len(datx) - num = 1 - #classlist = np.vectorize(lambda peak: peak.pccl, otypes=[object])(peaks.list) - #snips, scaledsnips = cut_snippets(datx, peaks.list[classlist == num], [-15,5]) - #pcs2 = pc(scaledsnips, peaks.list[classlist==num]) - #pcs2 = wpfeats(scaledsnips) - #dbscan(pcs2, peaks.list[classlist == num],4, 0.15, 15, False) - #print('Classify') - progressstr = 'Partstatus: '+ 'Part ' + '3'+ '/''5'+' Filestatus:' - fish.animate(amount = idx, dexextra = progressstr) - - # classifies the peaks using the data from the clustered classes and a simple amplitude-walk which classifies peaks as different classes if their amplitude is too far from any other classes' last three peaks - peaks, peaklist = dta.ampwalkclassify3_refactor(peaks, peaklist, thresh) # classification by amplitude - # print(peaks.classlist) - print(peaks) - bottletime.append(time.time()) #9 - join_count=0 - # while True and joincc(peaklist, peaks) == True and join_count < 200: - # join_count += 1 - # continue - # print(peaks.classlist) - bottletime.append(time.time()) #10 - - # discards all classes that contain less than mincl EODs - mincl = 6 # >=1 - peaks = smallclassdiscard(peaks, mincl) - bottletime.append(time.time()) #11 - - # discards peaks, that are too wide compared to their - # inter spike intervals and seem to be wavesfish signals - # actually... works in some cases - if len(peaks[0]) > 0: - peaks = discardwaves_refactor(peaks, datx) - - # plots the data part and its detected and classified peaks - if plot_steps == True: - plotampwalkclasses_refactored(peaks, datx) - #pass - - # map the analyzed EODs of the buffer part to the whole - # recording - worldpeaks = np.copy(peaks) - bottletime.append(time.time()) #13 - # change peaks location in the buffered part to the location relative to the - idx = 1 - # peaklocations relative to whole recording - worldpeaks[0] = worldpeaks[0] + (idx*nblock) - peaklist.len = nblock -# for p in worldpeaks: -# = idx*nblock + p.x - bottletime.append(time.time()) #14 - bottletime.append(time.time()) #15 - # extract the relevant information from each peakobject of - # the buffered part and rearrange it as numpy array for - # computational efficienty - #x = xarray(thisblock) - #y = yarray(thisblock) - #h = heightarray(thisblock) - #cllist = clarray(thisblock) - #bottletime.append(time.time()) #16 - #thisblock_eods = np.array([x,y,h, cllist]) - #bottletime.append(time.time()) #17 - #bottletime.append(time.time()) #18 - #thisblockeods_len = len(thisblock_eods[0,:]) - thisblock_eods = np.delete(peaks,3,0) - thisblockeods_len = len(thisblock_eods[0]) - progressstr = 'Partstatus: '+ 'Part ' + '4'+ '/''5'+' Filestatus:' - fish.animate(amount = idx, dexextra = progressstr) - - # save the peaks of the current buffered part to a numpy-memmap on the disk - if thisblockeods_len> 0 and save == 1 or save == 0: - if idx == 0: - eods = np.memmap(datasavepath+"/eods_"+filename[:-3]+"npmmp", dtype='float64', mode='w+', shape=(4,thisblockeods_len), order = 'F') - # fp = np.memmap(filepath[:len(filename)]+"eods_"+filename[:-3]+"npy", dtype='float32', mode='w+', shape=(4,len(thisblock_eods[0,:]))) - dtypesize = 8#4 #float32 is 32bit = >4< bytes long ---changed to float64 -> 8bit - eods = np.memmap(datasavepath+"/eods_"+filename[:-3]+"npmmp", dtype='float64', mode='r+', offset = dtypesize*eods_len*4, shape=(4,thisblockeods_len), order = 'F') - eods[:] = thisblock_eods - eods_len += thisblockeods_len - bottletime.append(time.time()) #19 - #classes.extend(np.unique(cllist)) - - # to clean the plt buffer... - plt.close() - - # get and print the measured times of the algorithm parts for the - # current buffer - bottletime.append(time.time())#20 - time_a= bottletime[0] - for i, times in enumerate(bottletime): - #print('times: ' ,i, times-time_a) - time_a=times - - progressstr = 'Partstatus: '+ 'Part ' + '5'+ '/''5'+' Filestatus:' - fish.animate(amount = idx, dexextra = progressstr) - # plt.show() - - # after the last buffered part has finished, save the memory mapped - # numpy file of the detected and classified EODs to a .npy file to the - # disk - eods = np.memmap(datasavepath+"/eods_"+filename[:-3]+"npmmp", dtype='float64', mode='r+', shape=(4,eods_len), order = 'F') - print('before final saving: print unique eodcl: ' , np.unique(eods[3])) - if save == 1: - # #print('eods', eods[3]) - path = filename[:-4]+"/" - if not os.path.exists(path): - os.makedirs(path) - if eods_len > 0: - print('Saved!') - np.save(filename[:-4]+"/eods8_"+filename[:-3]+"npy", eods) - else: - #np.save(filename[:-4]+"/eods5_"+filename[:-3]+"npy", thisblock_eods) - print('not saved') - - else: # if there already has been a certain existing result file and 'new' was set to False - print('already analyzed') - - - # not used data implementation using NIX - # Save Data - - # Needed: - # Meta: Starttime, Startdate, Length - # x, y, h, cl, difftonextinclass -> freq ? , - - # Later: Find "Nofish" - # Find "Twofish" - # Find "BadData" - # Find "Freqpeak" - # ? Find "Amppeak" - # - - # bigblock = np.array(bigblock) - # x=xarray(bigblock) - # y=yarray(bigblock) - # cl=clarray(bigblock) - - - #nix file = nix.File.open(file_name, nix.FileMode.ReadWrite) - #nix b = file.blocks[0] - #nix nixdata = b.data_arrays[0] - #nix cldata = [] - #nix #print(classes) - #nix #print(b.data_arrays) - #nix for i in range(len(np.unique(classes))): - #nix cldata.append(b.data_arrays[i+1]) - - - # for cl in - - # for cl in - # x = thisfish_eods - - - #nix file.close() - -def path_leaf(path): - ntpath.basename("a/b/c") - head, tail = ntpath.split(path) - return tail or ntpath.basename(head) - -def fill_hidden(fishclasses): - - fishes = fishclasses - - nohidefishes = {} - for cl in fishes: - x =[] - y = [] - h = [] - fish = fishes[cl] - # #print('fish', fish) - fishisi = calcisi(fish) - isi = fishisi[0] - for i, newisi in enumerate(fishisi): - leftpeak = fish[i] - x.append(leftpeak.x) - y.append(leftpeak.y) - h.append(leftpeak.height) - if newisi > 2.8*isi: - guessx = leftpeak.x + isi - - while guessx < leftpeak.x + newisi-0.8*isi: - - peakx = peakaround(guessx, isi*0.1, fishes) - if peakx is not None: - x.append(peakx) - y.append(leftpeak.y) - h.append(leftpeak.height) - guessx = peakx+ isi + (peakx-guessx) - - continue - break - isi = newisi - nohidefishes[cl]= {'x':x,'y':y,'h':h} - return nohidefishes - -def plotheights(peaklist): - heights = heightarray(peaklist) - x_locations = xarray(peaklist) - plt.scatter(x_locations, heights) - plt.show() - -def ploteods(eods, data): - plt.plot(range(len(data)),data, color = 'black') - classlist = eods[3] - cmap = plt.get_cmap('jet') - colors =cmap(np.linspace(0, 1.0, 3000)) #len(np.unique(classlist)))) - np.random.seed(22) - np.random.shuffle(colors) - colors = [colors[cl] for cl in np.unique(classlist)] - # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.c', ms=20) - x=0 - if len(classlist)>0: - # #print(classlist) - # #print('classes: ' , np.unique(classlist)) - from collections import Counter - count = Counter(classlist) - # #print('longest class: ', count.most_common()[0]) - for num, color in zip(np.unique(classlist), colors): - peaksofclass = eods[:,:][:, classlist == num] - #xpred = linreg_pattern(peaksofclass[0:3]) - #for p in peaksofclass[0:3]: - # #print(p.x) - ##print(xpred, peaksofclass[3].x) - - #if len(peaksofclass) > 1000: - # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.', color = 'red', ms =20) - #else: - plt.plot(peaksofclass[0], peaksofclass[1], '.', color = color, ms =20) - plt.show() - -def fill_hidden_3(fishes): - - fishes = fishes - - nohidefishes = {} - for cl, fish in fishes.items(): - x =[] - y = [] - h = [] - # fish = fishes[cl] passt net, fishes is np.array mit (cl, (xyh)) - fishisi = np.diff(fish[0]) - isi = fishisi[0] - for i, newisi in enumerate(fishisi): - leftpeak = i - x.append(fish[0][i]) - y.append(fish[1][i]) - h.append(fish[2][i]) - # #print(cl, fish[0][i], isi, newisi) - if newisi > 2.8*isi: - guessx = fish[0][i] + isi - - while guessx < fish[0][i] + newisi-0.8*isi: - - peakx = peakaround3(guessx, isi*0.1, fishes) - if peakx is not None: - # #print(jup) - x.append(peakx) - y.append(fish[1][i]) - h.append(fish[2][i]) - guessx = peakx+ isi + (peakx-guessx) - - continue - break - isi = newisi - nohidefishes[cl]= {'x':x,'y':y,'h':h} - - return nohidefishes - -def peakaround2(guessx, interval, fishes): - found = False - for cl, fish in fishes.items(): - for px in fish['x']: - distold = interval - if px < guessx-interval: - continue - # #print('in area', guessx-interval) - if guessx-interval < px < guessx+interval: - found = True - dist = px-guessx - if abs(dist) < abs(distold): - distold = dist - if px > guessx+interval: - if found == True: - # #print(guessx, dist) - return guessx + dist - else: break - return None - -def peakaround3(guessx, interval, fishes): - found = False - for cl, fish in fishes.items(): - for px in fish[0]: - distold = interval - if px < guessx-interval: - continue - # #print('in area', guessx-interval) - if guessx-interval < px < guessx+interval: - found = True - dist = px-guessx - if abs(dist) < abs(distold): - distold = dist - if px > guessx+interval: - if found == True: - # #print(guessx, dist) - return guessx + dist - else: break - return None - -def peakaround(guessx, interval, fishes): - found = False - for cl, fish in fishes.items(): - for peak in fish: - - distold = interval - if peak.x < guessx-interval: - continue - # #print('in area') - if guessx-interval < peak.x < guessx+interval: - found = True - dist = peak.x-guessx - if abs(dist) < abs(distold): - distold = dist - if peak.x > guessx+interval: - if found == True: - # #print(guessx, dist) - return guessx + dist - else: break - return None - -def fill_holes(fishes): #returns peakx, peaky, peakheight # Fills holes that seem to be missed peaks in peakarray with fake (X/Y/height)-Peaks - retur = {} - lost = {} - for cl, fish in fishes.items(): - fishisi = np.diff(fish['x']) - mark = np.zeros_like(fishisi) - isi = 0 - ##print('mark', mark) - # #print('fishisi' , fishisi) - #find zigzag: - c=0 - c0= 0 - n=0 - for i, newisi in enumerate(fishisi): - if abs(newisi - isi)>0.15*isi: - if (newisi > isi) != (fishisi[i-1] > isi): - c+=1 - # #print(abs(newisi - isi), 'x = ', fish[i].x) - c0+=1 - elif c > 0: - n += 1 - if n == 6: - if c > 6: - # print ('zigzag x = ', fish['x'][i-6-c0], fish['x'][i-6]) - mark[i-6-c0:i-6]= -5 - c = 0 - c0=0 - n = 0 - - #if c > 0: - # #print(i, c) - # if c == 6: - # #print('zigzag!') - isi = newisi - isi = 0 - for i, newisi in enumerate(fishisi): - ##print('mark: ' , mark) - if mark[i] == -5: continue - if i+2 >= len(fishisi): - continue - if (2.2*isi > newisi > 1.8*isi) and (1.5*isi>fishisi[i+1] > 0.5*isi) : - mark[i] = 1 - isi = newisi - # #print('found 1!' , i) - elif (2.2*isi > newisi > 1.8*isi) and (2.2*isi> fishisi[i+1] > 1.8*isi) and (1.5*isi > fishisi[i+2] > 0.5*isi): - mark[i] = 1 - isi = isi - elif 3.4*isi > newisi > 2.6*isi and 1.5*isi > fishisi[i+1] > 0.5*isi: - mark[i] = 2 - - elif (0.6* isi > newisi > 0): - # #print('-1 found', i ) - if mark[i] ==0 and mark[i+1] ==0 and mark[i-1]==0 : - # isi = newisi - # continue - # #print('was not already set') - if fishisi[i-2] > isi < fishisi[i+1]: - mark[i] = -1 - # #print('-1') - elif isi > fishisi[i+1] < fishisi[i+2]: - mark[i+1] = -1 - # #print('-1') - isi = newisi - filldpeaks = [] - x = [] - y = [] - h = [] - x_lost=[] - y_lost=[] - h_lost=[] - # #print('filledmarks: ', mark) - for i, m in enumerate(mark): - if m == -1 : - # #print('-1 at x = ', fish['x'][i]) - continue - if m == -5: - x_lost.append(fish['x'][i]) - y_lost.append(fish['y'][i]) - h_lost.append(fish['h'][i]) - x.append(fish['x'][i]) - y.append(fish['y'][i]) - h.append(fish['h'][i]) - continue - x.append(fish['x'][i]) - y.append(fish['y'][i]) - h.append(fish['h'][i]) - if m == 1: - # #print('hofly added peak at x = ' , fish['x'][i]) - x.append(fish['x'][i] + fishisi[i-1]) - y.append( 0.5*(fish['y'][i]+fish['y'][i+1])) - h.append(0.5*(fish['h'][i]+fish['h'][i+1])) - elif m== 2: - x.append(fish['x'][i] + fishisi[i]) - y.append( 0.5*(fish['y'][i]+fish['y'][i+1])) - h.append(0.5*(fish['h'][i]+fish['h'][i+2])) - x.append(fish['x'][i] + 2*fishisi[i-1]) - y.append( 0.5*(fish['y'][i]+fish['y'][i+2])) - h.append(0.5*(fish['h'][i]+fish['h'][i+2])) - # #print('added at x = ', fish['x'][i] + fishisi[i]) - retur[cl] = {'x':x,'y':y,'h':h} - lost[cl] = {'xlost':x_lost,'ylost':y_lost,'hlost':h_lost} - # filledpeaks =np.array(filledpeaks) - # #print(filledpeaks.shape) - # filledpeaks. - return retur, lost - -def calc_tsh_noise(peaks, data): - heights = np.vectorize(lambda peak: peak.height)(peaks) - # peakx = xarray(peaks) - # peakxlist = peakx.tolist() - # #print('datenstdanfang: ', np.std(data)) - # datatsh = np.mean(np.abs(data))# - # datatsh = 2* np.std(data) - # peakareas = [i for x in peakx for i in range(x-10, x+10) if (i < len(data))] - # peakareas = np.arange(peakx-10, peakx+10, 1) - # relevantdata = [] - #peakareas = np.unique(peakareas) - # #print(len(peakareas), len(data), ' len peakarea and data' , datatsh) - #relevantdata is the data without the areas around the peaks, to calculate the standard deviation of the noise - #c = 0 - tsh = 0.1*np.std(heights) - - #for i, dat in enumerate(data): - # if peakareas[c] == i and c dist: - # dist = tdist - #print('dist', dist) - if dist>=0: - valid = True - if olddatalen > 0: - alignlabels(labels, peaks, olddatalen) - for i, p in enumerate(peaklist): - pcclasses[peaknum] = labels[i] - return valid - if takekm: - km = KMeans(n_clusters=3, n_init = 3, init = 'random', tol=1e-5, random_state=170, verbose = True).fit(X) - core_samples_mask = np.zeros_like(km.labels_, dtype=bool) - labels = km.labels_ - if takekm: - for i, p in enumerate(peaklist): - # print('label ', labels[i]) - pcclasses[peaknum] = p.pccl - # Number of clusters in labels, ignoring noise if present. - n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) - #print('Estimated number of clusters: %d' % n_clusters_) - # ############################################################################# - # Plot result - # Black removed and is used for noise instead. - unique_labels = set(labels) - colors = [plt.cm.Spectral(each) - for each in np.linspace(0, 1, len(unique_labels))] - fig = plt.figure() - ax = fig.add_subplot(111, projection = '3d') - for k, col in zip(unique_labels, colors): - if k == -1: - # Black used for noise. - col = [0, 0, 0, 1] - class_member_mask = (labels == k) - xy = X[class_member_mask] - # print(col) - ax.plot(xy[:, 0], xy[:, 1],xy[:,2], 'o', markerfacecolor=tuple(col), - markeredgecolor='k', markersize=14) - ax.set_title('Estimated number of clusters: %d' % n_clusters_) - #plt.show() - - - from sklearn.neighbors import kneighbors_graph - knn_graph = kneighbors_graph(X, 15, include_self=False) - ac = AgglomerativeClustering(linkage = 'complete', n_clusters = 3, connectivity = knn_graph).fit(X) - core_samples_mask = np.zeros_like(ac.labels_, dtype=bool) - labels = ac.labels_ - if takekm: - for i, p in enumerate(peaklist): - print('label ', labels[i]) - pcclasses[peaknum] = labels[i] - # Number of clusters in labels, ignoring noise if present. - n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) - #print('Estimated number of clusters: %d' % n_clusters_) - # ############################################################################# - # Plot result - # Black removed and is used for noise instead. - unique_labels = set(labels) - colors = [plt.cm.Spectral(each) - for each in np.linspace(0, 1, len(unique_labels))] - fig = plt.figure() - ax = fig.add_subplot(111, projection = '3d') - for k, col in zip(unique_labels, colors): - if k == -1: - # Black used for noise. - col = [0, 0, 0, 1] - class_member_mask = (labels == k) - xy = X[class_member_mask] - print(col) - ax.plot(xy[:, 0], xy[:, 1],xy[:,2], 'o', markerfacecolor=tuple(col), - markeredgecolor='k', markersize=14) - ax.set_title('Estimated number of clusters: %d' % n_clusters_) - #plt.show() - -def ampwalkclassify3_refactor(peaks,peaklist): # final classificator - classamount = peaklist.classamount - # for i in range(start, len(peaks)-start): - lastofclass = peaklist.lastofclass # dict of a lists of the last few heightvalues of a class, f.E ((1,[0.7,0.68,0.71]), (5, [0.2, 0.21, 0.21])) - lastofclassx = peaklist.lastofclassx # dict of a list of the last few x-values of a class - a=0 - elem = 0 - thresholder = [] - comperr = 1 - classesnearby = peaklist.classesnearby # list of the classes of the last n peaks (currently 12) f.E:[1,2,1,2,1,3,2,1,...] - classesnearbyx = peaklist.classesnearbyx # list of the x-values of the last n peaks, f.E:[13300, 13460, 13587, 13690, 13701, ...] - classesnearbypccl = peaklist.classesnearbypccl # list of the pc-classified classes of the last n peaks - classes = np.zeros((len(peaks[0]))) - pcclasses = peaks[3] - positions = peaks[0] - heights = peaks[1] - - # #print('nearbyclasses at start:' ,classesnearby, classesnearbyx) - # for peak in peaks: - # peak.cl = peak.pccl+2 - # peaklist.classlist = np.vectorize(lambda peak: peak.cl, otypes=[object])(peaklist.list) - # return peaks - cl = 0 - maxdistance = 30000 # Max distance to possibly belong to the same class - factor = 1.6 # factor by which a peak fits into a class, f.E: classheight = 1 , factor = 2 => peaks accepted in range (0.5,2) - c=0 - peakamount = len(peaks.T) - #fish = ProgressFish(total = peakamount) - for peaknum, p in enumerate(peaks.T): - perc = str((peaknum*100)//peakamount) - # fish.animate(amount = "", dexextra = 'Partstatus: '+ ' '*(3-len(perc)) +perc + ' % (' + ' '*(4-len(str(peaknum)))+str(peaknum) + '/' + ' ' *(4-len(str(peakamount)))+str(peakamount) + '), Filestatus:') - awc_btime = [] - if len(lastofclass) == 0: # Dict with all classes, containing the heights of the last few peaks - lastofclass[1] = deque() - lastofclassx[1]= deque() - lastofclass[1].append(heights[peaknum]) - lastofclassx[1].append(positions[peaknum]) - classesnearby.append(1) - classesnearbyx.append(-1) - classesnearbypccl.append(pcclasses[peaknum]) - classes[peaknum] = 1 - classamount += 1 - continue - time1 = time.time() - for i, cl in enumerate(classesnearby): - if (positions[peaknum]-classesnearbyx[i]) > maxdistance: - classesnearby.pop(i) - classesnearbyx.pop(i) - classesnearbypccl.pop(i) - lastofclassisis = [] - for i in classesnearby: - # print(i, classesnearby) - lastofclassisis.append(np.median(np.diff(lastofclassx[i]))) - meanisi = np.mean(lastofclassisis) - if 32000 > 20*meanisi> 6000: - maxdistance = 20*meanisi - #print(meanisi, maxdistance , 'maxdistance ----------------------------------------------------------------------------------------------') - - time2 = time.time() - awc_btime.append(time2-time1) #0 - cl = 0 # 'No class' - comperr = 1 - ##print('classesnearby at a peak', classesnearby) - clnrby = np.unique(classesnearby) - time1 = time.time() -# classmean = 0 - # if pcclasses[peaknum] == -1: - # factor = 1.2 - # else: - # factor = 1.6 - - for i in clnrby: - #print('cl: ', i) - # if classesnearbypccl[classesnearby.index(i)] == -1: - # factor = 2.2 - # else: factor = 1.6 - classmean = np.mean(lastofclass[i]) - logerror = np.abs(np.log2(heights[peaknum])-np.log2(classmean)) - abserror = np.abs(heights[peaknum]-classmean) - logthresh = np.log2(factor) - #ä#print(np.std(lastofclass[i])) absthresh = 0.5*classmean # #print('test log', np.abs(np.log2(np.array([0.4,0.5,1,1.5,2,2.4]))-np.log2(np.array([1,1,1,1,1,1]))) ) # abs(classmean*0.5) - #relerror = error - relerror = logerror - relabserror = abserror/thresh - # if 1140 < p.num < 1150: - # print(p.num) - # print('for classes at one peak: classmean, height, abserror, thresh', - # classmean,heights[peaknum], logerror, logthresh) - #print(len(classesnearbypccl), len(classesnearby)) - #print(classmean, heights[peaknum], logerror, logthresh, pcclasses[peaknum], classesnearbypccl[classesnearby.index(i)]) - if classesnearbypccl[classesnearby.index(i)] == pcclasses[peaknum] or pcclasses[peaknum] == -1:# or - if logerror < logthresh: ## SameClass-Condition - if relerror < comperr and (positions[peaknum]-classesnearbyx[classesnearby.index(i)]) 2*compareisierror: -# cl = holdlastcl - - time2 = time.time() - awc_btime.append(time2-time1) #1 - time1 = time.time() - if pcclasses[peaknum] != -1: - if cl != 0 : - #print(cl) - if len(lastofclass[cl]) >= 3: - lastofclass[cl].popleft() - if len(lastofclassx[cl]) >= 3: - lastofclassx[cl].popleft() - lastofclass[cl].append(heights[peaknum]) - lastofclassx[cl].append(positions[peaknum]) - classes[peaknum] = cl - else: # Add new class - cl = classamount+1 - #print('existingclasses: ', classamount) - classamount = cl - - #print('newclass: ----------------------------------------------------------------', cl) - lastofclass[cl] = deque() - lastofclassx[cl] = deque() - lastofclass[cl].append(heights[peaknum]) - lastofclassx[cl].append(positions[peaknum]) - classes[peaknum] = cl - classesnearby.append(cl) - classesnearbyx.append(positions[peaknum]) - classesnearbypccl.append(pcclasses[peaknum]) - ##print('tatsaechlich: ', cl) - if len(classesnearby) >= 12: #kacke implementiert? - minind = classesnearbyx.index(min(classesnearbyx)) - del lastofclass[classesnearby[minind]] - del lastofclassx[classesnearby[minind]] - #print(classesnearby[minind], 'del') - classesnearby.pop(minind) - classesnearbyx.pop(minind) - classesnearbypccl.pop(minind) - # for ind, clnrby in enumerate(reversed(classesnearby)): - # classesnearbyx - # del lastofclass[classesnearby[ind]] - # # del lastofclassx[classesnearby[minind]] - # classesnearby.pop(minind) - # classesnearbyx.pop(minind) - try: - ind=classesnearby.index(cl) - classesnearbyx[ind] = positions[peaknum] - # #print(ind ,' --------------------------------------here -----------------------------') - except ValueError: - classesnearby.append(cl) - classesnearbyx.append(positions[peaknum]) - classesnearbypccl.append(pcclasses[peaknum]) - else: - if cl != 0: - classes[peaknum] = cl - else: - cl = classamount+1 - #print('existingclasses: ', classamount) - classamount = cl - #print('newclass: ', cl) - lastofclass[cl] = deque() - lastofclassx[cl] = deque() - lastofclass[cl].append(heights[peaknum]) - lastofclassx[cl].append(positions[peaknum]) - classes[peaknum] = cl - classesnearby.append(cl) - classesnearbyx.append(positions[peaknum]) - classesnearbypccl.append(pcclasses[peaknum]) - if len(classesnearby) >= 12: #kacke implementiert? - minind = classesnearbyx.index(min(classesnearbyx)) - del lastofclass[classesnearby[minind]] - del lastofclassx[classesnearby[minind]] - #print(classesnearby[minind], 'del') - classesnearby.pop(minind) - classesnearbyx.pop(minind) - classesnearbypccl.pop(minind) - # for ind, clnrby in enumerate(reversed(classesnearby)): - # classesnearbyx - # del lastofclass[classesnearby[ind]] - # # del lastofclassx[classesnearby[minind]] - # classesnearby.pop(minind) - # classesnearbyx.pop(minind) - try: - ind=classesnearby.index(cl) - classesnearbyx[ind] = positions[peaknum] - # #print(ind ,' --------------------------------------here -----------------------------') - except ValueError: - classesnearby.append(cl) - classesnearbyx.append(positions[peaknum]) - classesnearbypccl.append(pcclasses[peaknum]) - # #print('classesnearby after a peak', classesnearby) - # for clnum, cls in enumerate(classesnearby): ## deleting almost identical classes (< % difference in amplitude) - # if cls == False: - # continue - # if True: - # continue - # compare = np.mean(lastofclass[cls]) - # for i in classesnearby[clnum:-1]: - # if i== False: - # continue - # if i != cls and abs(compare - np.mean(lastofclass[i])) < compare*0.01: ## - # # #print(compare) - # # #print( np.mean(np.vectorize(lambda peak: peak.height)(lastofclass[i]))) - # clindex = classesnearby.index(cls) - # classesnearby[clindex] = False - # classesnearbyx[clindex] = False - # del lastofclass[cls] - # del lastofclassx[cls] - # # cl = holdlastcl - # # if cl == cls: - # - # - # #print('combinedsomeclasses that were similar', cl, cls) - time2 = time.time() - # awc_btime.append(time2-time1) #2 - # classesnearby = [cls for cls in classesnearby if cls != False] - # classesnearbyx = [clx for clx in classesnearbyx if clx != False] - # - # - #print('awc_btime ', awc_btime , ' newpeak-------------------------------------------------------- :') - peaklist.lastofclass = lastofclass - peaklist.lastofclassx = lastofclassx - peaklist.classesnearby = classesnearby - peaklist.classesnearbyx = classesnearbyx - peaklist.classlist = classes # np.vectorize(lambda peak: peak.cl, otypes=[object])(peaklist.list) - peaklist.classamount = classamount - peaks = np.append(peaks,classes[None,:], axis = 0) - return peaks, peaklist - -def joincc(peaklist,peaks): - # connects classes that appear after each other... - # peaklist = peaks.list - joinedsome = False - classlist = peaks[4] - peaksofclass = {} - last = [] - connect = {} #connect classes in connect+ - classcount = dict.fromkeys(classlist, 0) - ##print(classcount) - #classcount = [0]*len(np.unique(classlist)) - # #print(np.unique(classlist)) - for cl in np.unique(classlist): - peaksofclass[cl]= peaks[:,classlist == cl] - for i in range(len(peaks[0])): # i is the increasing index of the peaks - p = peaks[:,i] - poc = peaksofclass[p[4]] - classcount[p[4]]+=1 - countclass = p[4] #the current class before it might be changed to the connected class - if p[4] in connect: - p[4] = connect[p[4]] #peakclass is changed to connected class - # #print('changed ', countclass, 'to', p.cl) - joinedsome = True - - if len(poc) == classcount[countclass]: #the current peak is last peak of its class - last = poc[-len(poc) if len(poc) <= 5 else 5:] #the last peaks of the class - # #print('last: ', last) - #mean_last = np.mean(np.vectorize(lambda peak: peak[2])(last)) - mean_last = np.mean(last[2,:]) - nextfirst = {} # the first peaks of the next coming class(es) - # #print('class: ', countclass, 'at x = ', p.x, 'mean_last: ', mean_last) - for nexti in range(20): # the next 10 peaks are considered if they belong to the same classe - if i + nexti >= len(peaks[0]): break - inextp = peaks[:,i+nexti] - if classcount[inextp[4]] == 0: #current peak is first peak of its class - # #print('found a new begin! its class:' , inextp.cl) - ponc = peaksofclass[inextp[4]] # - nextfirst[inextp[4]] = ponc[0:len(ponc) if len(ponc) <= 5 else 5] - # #print(np.mean(np.vectorize(lambda peak: peak.height)(nextfirst[inextp.cl]))) - # #print(nextfirst) - compare = 1 - c = 0 - nextclass = -1 - for nextcl, first in nextfirst.items(): - mean_nextfirst = np.mean(first[2,:])#np.mean(np.vectorize(lambda peak: peak.height)(first)) - # #print(mean_nextfirst) - error = abs(mean_nextfirst - mean_last)/(mean_nextfirst) - if error < 1: - if compare < error: - continue - compare = error - if nextcl in connect: #if the peak that ist considered belongs to a class, that is already supposed to be connected to the current class - pocc = peaksofclass[connect[nextcl]] #peaks of the currently supposed connected class - if ( abs(mean_nextfirst - np.mean(pocc[-len(pocc) if -len(pocc) <= 5 else 5:][2])) - < abs(mean_nextfirst - mean_last) ): - continue - nextclass = nextcl - if nextclass != -1: - connect[nextclass] = p[4] - # #print('connect ', p.cl , ' and ', nextcl) - for cl in peaklist.classesnearby: - if cl in connect: - # #print('cl, connect', cl, connect[cl]) - peaklist.classesnearby[peaklist.classesnearby.index(cl)] = connect[cl] - peaklist.lastofclass[connect[cl]]=peaklist.lastofclass[cl] - peaklist.lastofclassx[connect[cl]]= peaklist.lastofclassx[cl] - peaklist.classlist = peaks[4] - return joinedsome - # for poc in peaksofclass: - # if len(poc) >= 3: - # newlast = poc[-3:] - # first = poc[:3] - # else: - # newlast = poc[-len(poc):] - # first = poc[:len(poc)] - # if last != []: - # if abs(np.mean(first) - np.mean(last)) < 0: - # #print('oh') - -def discardwaves_refactor(peaks, data): - - deleteclasses = [] - for cl in np.unique(peaks[3]): - peaksofclass = peaks[:,peaks[3] == cl] - isi = np.diff(peaksofclass[0]) - isi_mean = np.mean(isi) - # #print('isismean',isi_mean) - widepeaks = 0 - # #print('width',peaksofclass[2].width) - isi_tenth_area = lambda x, isi:np.arange(np.floor(x-0.1*isi),np.ceil(x+0.1*isi),1, dtype = np.int) - for p in peaksofclass.T: - data = np.array(data) - try: - for dp_around in data[isi_tenth_area(p[0],isi_mean)]:#np.floor(p[0]-0.1*isi_mean), np.ceil(p[0]+0.1*isi_mean),1)]:# - if dp_around <= p[1]-p[2]: - break - except IndexError: - pass - else: - widepeaks+=1 - ## p.isreal_pleateaupeaks() - if widepeaks > len(peaksofclass)*0.5: - deleteclasses.append(cl) - for cl in deleteclasses: - peaks = peaks[:,peaks[3]!=cl] - return peaks - -def smallclassdiscard(peaks, mincl): - classlist = peaks[3] - smallclasses = [cl for cl in np.unique(classlist) if len(classlist[classlist - == cl]) < - mincl] - delete = np.zeros(len(classlist)) - for cl in smallclasses: - delete[classlist == cl] == 1 - peaks = peaks[:,delete != 1] - return peaks - -def makepeak(data_x,cutsize, maxwidth, peakx, ltr, data_ltr, rtr, data_rtr, num, minhlr): - #if len(data) > peakx + cutsize/2: - return Peak(peakx, data_x, maketr(data_ltr, ltr), maketr(data_rtr, rtr), maxwidth, num, minhlr)#data[peakx-cutsize/2:peakx+cutsize/2], num) - #else: - # return Peak(peakx, data[peakx], - # maketr(data, ltr), - # maketr(data, rtr), - # maxwidth, - # #data[peakx-cutsize/2:-1], - # num) - -def maketr(data_x, x): - if x is not None: - return Tr(x,data_x) - else: - return None - -def makepeaklist(pkfirst, data, pk, tr, cutsize, maxwidth): - peaklist = np.empty([len(pk)], dtype = Peak) - trtopk = pkfirst - pktotr = 1-pkfirst - trlen = len(tr) - pklen = len(pk) - minhlr = lambda i, mwl, mwr : min( - abs( data[pk[i]] - min( data[pk[i]-mwl:pk[i]] ) if len(data[pk[i]-mwl:pk[i]]) > 0 else 0 ) - , - abs( data[pk[i]]- min( - data[pk[i]:pk[i]+mwr] ) if len(data[pk[i]:pk[i]+mwr]) > 0 else 0 ) - ) - #print(min( data[pk[0]-0:pk[2]]) ) - - if pktotr == 0: - peaklist[0] = makepeak(data[0], cutsize, maxwidth, pk[0], None, None, tr[pktotr], data[pktotr], 0, minhlr(0, 0, maxwidth)) - else: - peaklist[0] = makepeak(data[0], cutsize, maxwidth, pk[0], - tr[-trtopk], - data[-trtopk], tr[pktotr], data[pktotr], - 0, minhlr(0, min(maxwidth, - pk[0]-tr[-trtopk]) , maxwidth)) - for i in range(1,pklen-1): - peaklist[i] = makepeak(data[pk[i]], cutsize, maxwidth, pk[i], tr[i-trtopk], data[tr[i-trtopk]], tr[i+pktotr],data[tr[i+pktotr]], i, minhlr(i, maxwidth, maxwidth)) - if pktotr == 0 and pklen <= trlen: - peaklist[pklen-1] = makepeak(data[pk[pklen-1]],cutsize, maxwidth, pk[pklen-1], tr[pklen-trtopk-1], data[pklen-trtopk-1], tr[pklen+pktotr-1], data[pklen+pktotr-1], i, minhlr(pklen-1, maxwidth, min(maxwidth, tr[pklen+pktotr-1]-pk[pklen-1]))) - else: - peaklist[pklen-1] = makepeak(data[pk[pklen-1]],cutsize, maxwidth, pk[pklen-1], tr[pklen-trtopk-1],data[pklen-trtopk-1], None, None, pklen-1, minhlr(pklen-1, maxwidth, 0)) - return peaklist - -#def doublepeaks(peaks, peakwidth): -# dif2 = peaks[1].x-peaks[0].x -# if dif2 > 5* peakwidth: -# peaks[0].real = False -# for i in range(1,len(peaks)-1): -# dif1 = dif2 -# dif2 = peaks[i+1].x-peaks[i].x -# if dif1 > 5* peakwidth and dif2 > 5* peakwidth: -# peaks[i].real = False -# if dif2 > 5* peakwidth: -# peaks[len(peaks)-1] = False -# return peaks - -def discardunrealpeaks(peaklist): - peaks = peaklist[:][np.vectorize(lambda peak: peak.real, otypes=[object])(peaklist) == True] - for i, p in enumerate(peaks): - pass - # p.num = i - return peaks - -def discardnearbypeaks(peaks, peakwidth): - peaksx = xarray(peaks) - pkdiff = np.diff(peaksx) - # peakwidth = avg_peakwidth(pknum,tr) - pknumdel= np.empty(len(peaksx)) - pknumdel.fill(False) -# peaksy = yarray(peaks) - peaksh = heightarray(peaks) - for i,diff in enumerate(pkdiff): - # #print(peaks[i].height) - if diff < peakwidth: #* peaks[i].height: ### Trial Error - if peaksh[i+1] > 1.01 *peaksh[i] : - pknumdel[i] = True - else: - # print(peaksh[i],peaksh[i+1]) - pknumdel[i+1] = True - peaks = peaks[pknumdel!=True] - for i, p in enumerate(peaks): - p.num = i - return peaks - -def interpol(data, kind): - #kind = 'linear' , 'cubic' - width = len(data) - x = np.linspace(0, width-1, num = width, endpoint = True) - return interp1d(x, data[0:width], kind , assume_sorted=True) - -def cutcenter(peak): - p = peak - cut = p.cut - pl=p.distancetoltr - pr=p.distancetortr - if pl is None: - pl = 10 - tx = p.x-10 - else: tx = p.ltr.x - if pr is None: - pr = 10 - if pl < p.maxwidth and pr > 1: - - width=len(cut) - # #print('distancetoltr',pl) - peakshape = cut - interpolfreq = 1 - xnew = np.linspace(0,len(peakshape)-1, len(peakshape)*interpolfreq, endpoint= True) - curvyf = interpol(peakshape) - curvy= curvyf(xnew) - #px = p.cutsize/2 * 4 - #left = px - (5*4) - #plt.plot(xnew, curvy) - #x_0 = optimize.fsolve(curvyf, 1.0) - # f = interp1d(x, y) - # f2 = interp1d(range(width), data[x:x+width], kind='cubic') - ##xnew = np.linspace(0, width-1, num = width*4, endpoint = True) - ##print(xnew) - # plt.plot(xnew,f2(xnew)) - ##print("show") - #plt.show - trx = (p.cutsize/2 - (p.x - tx) ) - if trx >0 : - xstart = trx - else: - xstart = 0 - # #print('pkx: ', p.x, 'ltrx: ', p.ltr.x) - # #print('trx in intpol', x) - x = xstart - if curvyf(x) < 0: - left = 0 - right= 0 - while(x < width-1 and curvyf(x) < 0) : - left = x - # #print(curvyf(x)) - x+=0.25 - right = x - # #print('x: ', x , 'left, right: ', curvyf(left), curvyf(right)) - x = left+(1-curvyf(right)/(curvyf(right)-curvyf(left)))*1/interpolfreq - # #print(x) - else: - x = 0 - # #print(x_int) - # plt.scatter(xstart, curvyf(xstart), marker = 'x', s=150, zorder=2, linewidth=2, color='red') - # plt.scatter(x, curvyf(x), marker='x', s=150, zorder=2, linewidth=2, color='black') - # plt.show - # #print(x_int) - #p.relcutcenter = (p.ltr.x + x_int)-p.x - ##print('cent',p.relcutcenter) - #return (p.ltr.x + x_int)-p.x - - # while(data[x]>0) - else: - x= 0 - - return x - -def relcutarray(peaks): - return np.vectorize(lambda peak: peak.relcutcenter)(peaks) - -def xarray(peaks): - if len(peaks)>0: - peakx = np.vectorize(lambda peak: peak.x)(peaks) - return peakx - else: return [] - -def yarray(peaks): - if len(peaks)>0: - return np.vectorize(lambda peak: peak.y)(peaks) - else: return [] - -def heightarray(peaks): - if len(peaks)>0: - return np.vectorize(lambda peak: peak.height)(peaks) - else: return [] - -def clarray(peaks): - if len(peaks)>0: - return np.vectorize(lambda peak: peak.cl)(peaks) - else: return [] -def pcclarray(peaks): - if len(peaks)>0: - return np.vectorize(lambda peak: peak.pccl)(peaks) - else: return [] - -def peakxarray( ): - peakx = np.empty([len]) - peakx = np.vectorize(lambda peak: peak.x)(peaks) - return peakx - -def peakyarray( ): - peaky= np.empty([len]) - return np.vectorize(lambda peak: peak.y)(peaks) - - -def classify( ): - #template = peaks[0] - meanfit = np.mean(np.vectorize(fit, otypes=[object])(template,peaks)) - for p in peaks: - if fit(template,p) < meanfit: - # #print('classified ', fit(template,p) , ' meanfit: ' , meanfit) - p.currentclass = 1 - -def classifyhiker(template, peaks): - meanfit = np.mean(np.vectorize(fitinterpol2, otypes=[object])(template,peaks)) - #toclassify = peaks.tolist() - firstnot = 0 - for c in range(1,5): - first = True - template = peaks[firstnot] - for i, p in enumerate(peaks[firstnot:]): - if p.currentclass == 0: - if fitinterpol2(template,p) < meanfit: - # #print('peak number ' , i, 'classified as ', c, fit(template,p) , ' meanfit: ' , meanfit) - p.currentclass = c - template = p - elif first == True: - # #print('peak number ' , i, 'classified as First! ', c, fit(template,p) , ' meanfit: ' , meanfit) - firstnot = i - first = False - else: - None - ##print('peak number ' , i, 'classified as not classified!', fit(template,p) , ' meanfit: ' , meanfit) - return peaks - - - # def Templatefitnext( , number, templnum): - # for p in peaks: - # if fit(peaks[templnum], p) < fitparameter: - -def cut_snippets(data, peaklist, rnge): - snippets = [] - positions = xarray(peaklist) - heights = heightarray(peaklist) - for pos in positions: - snippets.append(data[(pos+rnge[0]):(pos+rnge[1])]) - scaledsnips = np.empty_like(snippets) - for i, snip in enumerate(snippets): - top = -rnge[0] - # plt.plot(snip) - scaledsnips[i] = snip * 1/heights[i] - #plt.plot(scaledsnips[i]) - # print('plted') -# plt.show() - #print('1') - alignedsnips = np.empty((len(snippets), (rnge[1]-rnge[0])*10-30-10)) - standardized = np.empty((len(snippets), (rnge[1]-rnge[0])*10-10)) - intfact = 10 - for i, snip in enumerate(scaledsnips): - if len(snip) < ((rnge[1]-rnge[0])): - if i == 0: - snip =np.concatenate([np.zeros([((rnge[1]-rnge[0]) - len(snip))]),np.array(snip)]) - if i == len(scaledsnips): - snip = np.concatenate([snip, np.zeros([((rnge[1]-rnge[0])-len(snip))])]) - else: - # print('this') - snip = np.zeros([(rnge[1]-rnge[0])]) - interpoled_snip = interpol(snip)(np.arange(0, len(snip)-1, 1/intfact)) if len(snip) > 0 else np.zeros([(rnge[1]-rnge[0]-1)*intfact ]) #interpolfactor 10 - - intsnipheight = np.max(interpoled_snip) - np.min(interpoled_snip) - if intsnipheight == 0: - intsnipheight = 1 - interpoled_snip = (interpoled_snip - max(interpoled_snip))* 1/intsnipheight - standardized[i] = interpoled_snip - #print('2') - mean = np.mean(standardized, axis = 0) - #plt.plot(mean) -# plt.show() - #plt.plot(mean[10*-rnge[0]-10*5:-10*rnge[1]+21]) -# plt.show() - meantop = np.argmax(mean) - for i, snip in enumerate(standardized): - #plt.show() - interpoled_snip = snip #standardized[i] - cc = crosscorrelation(interpoled_snip[15:-15], mean) - #cc = crosscorrelation(interpoled_snip[15 + 10*-rnge[0]-10*7:-15+ -10*rnge[1]+ 31], mean[10*-rnge[0]-10*7:-10*rnge[1]+31]) - #plt.plot(interpoled_snip[15 + 10*-rnge[0]-10*7:-15+ -10*rnge[1]+ 31]) - #top = np.argmax(interpoled_snip) - #offset = meantop - top - #if not(-15 <= offset <= 15): offset = 0 - offset = -15 + np.argmax(cc) - interpoled_snip = interpoled_snip[15-offset:-15-offset] if offset != -15 else interpoled_snip[30:] - #print(offset) - #plt.plot(interpoled_snip) - if len(interpoled_snip[~np.isnan(interpoled_snip)])>0: - alignedsnips[i] = interpoled_snip - #plt.show() - # print('3') - return snippets, alignedsnips - - - -def fit(templ, peak): - fit = np.sum(np.square(templ.cut - peak.cut)) - return fit - -def fitinterpol2(templ,peak): - t = templ - p = peak - if p.real and t.real: - fit = np.sum(np.square(t.cutaligned-p.cutaligned)) - else: - fit = 0 - return fit - - - -def fitinterpol( templ, peak): - t = templ - p = peak - if p.real: - centerp = cutcenter(p) - centert = cutcenter(t) - shiftp = centerp-p.cutsize/2 - shiftt = centert-t.cutsize/2 - - if shiftp > -5: - shiftp = min(5, 5+centerp-p.cutsize/2) - else: shiftp = 0 - - if shiftt > -5: - shiftt = min(5, 5+centert-t.cutsize/2) - else: shiftt = 0 - - xnew = np.linspace(0,p.cutsize-11, (p.cutsize-1) * 4,endpoint = True) - #peak_interpoled = interpol(p.cut)(xnew) - #plt.plot(xnew, interpol(p.cut)(xnew+shift)) - # #print(interpol(templ.cut)(xnew+shiftt)-interpol(p.cut)(xnew+shiftp)) - fit = np.sum(np.square(interpol(templ.cut)(xnew+shiftt)-interpol(p.cut)(xnew+shiftp))) - else: - fit = 0 - return fit - - -def plotdata(peaks, data): - x = xarray(peaks) - y = yarray(peaks) - plt.plot(range(len(data)),data) - plt.plot(x, y, '.r', ms=20) - #for p in peaks: - # #print(p.height, p.x, p.y, p.distancetoltr, p.distancetortr, p.nexttrdistance) - # plt.plot(tr, data[tr], '.g', ms=20) - plt.show() - - -def plotdatabyx(peaksx, data): - x = peaksx - y = data[peaksx] - plt.plot(range(len(data)),data) - plt.plot(x, y, '.r', ms=20) - plt.show() - #for p in peaks: - # #print(p.height, p.x, p.y, p.distancetoltr, p.distancetortr, p.nexttrdistance) - # plt.plot(tr, data[tr], '.g', ms=20) - -def plotpeak(peaks): - #plt.plot(peaks), cutpeaks) #bei betrachtung aller blocks zu groß! - for p in peaks: - plt.plot(range(p.cutsize),p.cut) - #plt.plot(pk, x[pk] , '.r', ms=20) - plt.show() - - -def periodicinclass(peaks, cl): - noiselist = [] - classlist = np.vectorize(lambda peak: peak.cl, otypes=[object])(peaks) - peaks = xarray(peaks) - peaks = peaks[:][classlist == cl] - periodic = [] - periodiccollector = [] - error2 = [] - isperiodic = True - b=1 - c=2 - ctofar = False - compdif = 0 - dif = 0 - count = 1 - foundtriple = False - next = 0 - for i in range(len(peaks)-1): - if i != next: continue - # #print(i, 'foundtriple', foundtriple) - error2 = [] - b=1 - c=0 - A = peaks[i] - B = peaks[i+b] - compdif = dif - while foundtriple == True and count <= 3 and i+1 < len(peaks)-1: - while B-A < compdif*1.5 and i+b+1 < len(peaks)-1: - # #print('newdif: ', B-A, 'olddif:' , dif) - if abs((B-A) - compdif) < compdif*0.4: - error2.append(abs((B-A) - dif)) - b+=1 - B = peaks[i+b] - if len(error2) > 0: - bestB = error2.index(min(error2)) - B = peaks[i+1 + bestB] - periodic.append(B) - dif = 0.5*(dif + (B-A)) - # #print('match found') - b = 1+bestB - break - else: - count+=1 - compdif = dif*count - else: - if foundtriple == True: - # #print('no further match found, ') - isperiodic = False - - - - - while foundtriple == False and i+c< len(peaks)-1: - while i+c < len(peaks)-1: - A = peaks[i] - B = peaks[i+b] - C = peaks[i+c] - dif1 = B - A - dif2 = C - B - if (C-B > (B-A)*1.5): - break - if abs(dif1 - dif2) < dif1*0.4: - error2.append(abs(dif1-dif2)) - c +=1 - #C = peaks[i+c] # C weiterlaufenlassen, bis zu weit - else: - if len(error2) == 0: - # #print('no triple found') - isperiodic = False - if len(error2) > 0: - bestC = error2.index(min(error2)) - C = peaks[i+2 + bestC] - c = 2+ bestC - periodic.extend((A,B,C)) - dif1 = B - A - dif2 = C - B - # #print('dif1: ', dif1, 'dif2: ', dif2) - dif = 0.5*(dif2+dif1) - foundtriple = True - # #print('triple found', i+c, 'dif : ', dif) - else: - error2 = [] # B weiterlaufen lassen, C reset auf B+1 - b +=1 - c = b+1 - - if isperiodic == False: - if len(periodic) > 3: - periodiccollector.append(periodic) - isperiodic = True - periodic = [] - if c!=0: - next = i+c - else: - next = i+b - if len(periodiccollector) > 0: - # for i in range(len(periodiccollector)): - # #print('collector ', i, periodiccollector[i]) - return periodiccollector - else: - #print('no periodicity found') - return [] - - - -def noisediscard(peaklist, tsh_n, ultimate_threshold): - detected_noise = False - ##print('noisetsh: ', tsh_n) - for p in peaklist.list: - - if p.height < tsh_n or p.height < ultimate_threshold: - p.noise = True - detected_noise = True - peaklist.list = peaklist.list[:][np.vectorize(lambda peak: peak.noise, otypes=[object])(peaklist.list) == False] - # #print(peaks) - # for cl in classlist: - # diff = np.vectorize(lambda peak: peak.x, otypes=[object])(peaks[:][classlist == cl]) - # meandiff = np.mean(diff) - # msecompare = np.mean(np.square(diff-(diff*0.8))) - # mse = np.mean(np.square(diff-meandiff)) - # if mse > msecompare: - # noiselist.append(cl) - # for p in peaks: - #if p.cl in noiselist: - # if p.height < 0.1: - # p.noise = True - # peaks = peaks[:][np.vectorize(lambda peak: peak.noise, otypes=[object])(peaks) == False] - # return peaks - return detected_noise - - -def plotPCclasses_ref(peaks, data): - plt.plot(range(len(data)),data, color = 'black') - print(peaks) - classlist = np.array(peaks[3],dtype = 'int') - cmap = plt.get_cmap('jet') - colors =cmap(np.linspace(0, 1.0, 3000)) #len(np.unique(classlist)))) - np.random.seed(22) - np.random.shuffle(colors) - colors = [colors[cl] for cl in np.unique(classlist)] - print('classlist', np.unique(classlist)) - # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.c', ms=20) - # x=0 -# if len(classlist)>0: - # #print(classlist) - # #print('classes: ' , np.unique(classlist)) - #from collections import Counter - #count = Counter(classlist) - # #print('longest class: ', count.most_common()[0]) - for num, color in zip(np.unique(classlist), colors): - if num == -1 : - color = 'black' - peaksofclass = peaks[:,classlist == num] - #xpred = linreg_pattern(peaksofclass[0:3]) - #for p in peaksofclass[0:3]: - # #print(p.x) - ##print(xpred, peaksofclass[3].x) - #if len(peaksofclass) > 1000: - # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.', color = 'red', ms =20) - #else: - print(num) - plt.plot(peaksofclass[0], peaksofclass[1], '.', color = color, ms =20) - #plt.scatter(peaks[0], peaks[2]) - # for p in peaks: - # plt.text(p.x, p.y, p.num) - #plt.show() - - print('show pcclasses') - plt.show() - plt.close() - -def plotampwalkclasses_refactored(peaks, data): - plt.plot(range(len(data)),data, color = 'black') - classlist = np.array(peaks[3],dtype=np.int) - cmap = plt.get_cmap('jet') - colors =cmap(np.linspace(0, 1.0, 3000)) #len(np.unique(classlist)))) - np.random.seed(22) - np.random.shuffle(colors) - colors = [colors[cl] for cl in np.unique(classlist)] - # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.c', ms=20) - # x=0 -# if len(classlist)>0: - # #print(classlist) - # #print('classes: ' , np.unique(classlist)) - #from collections import Counter - #count = Counter(classlist) - # #print('longest class: ', count.most_common()[0]) - for cl, color in zip(np.unique(classlist), colors): - peaksofclass = peaks[:,classlist == cl] - #xpred = linreg_pattern(peaksofclass[0:3]) - #for p in peaksofclass[0:3]: - # #print(p.x) - ##print(xpred, peaksofclass[3].x) - - #if len(peaksofclass) > 1000: - # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.', color = 'red', ms =20) - #else: - - plt.plot(peaksofclass[0],peaksofclass[1], '.', color = color, ms =20) - plt.scatter(peaksofclass[0], peaksofclass[2]) - # for p in peaks: - # plt.text(p.x, p.y, p.num) - plt.show() - - # plt.show() - plt.close() - - -def crosscorrelation(sig, data): - autocorr = signal.fftconvolve(data, sig[::-1], mode='valid') - return autocorr - -def plottemplatefits(data, peaks, tr, templnum): - # - plotdata(peaks, data, tr) - plt.plot(range(len(data)),data) - classes = np.vectorize(lambda peak: peak.currentclass, otypes=[object])(peaks) - class1 = peaks[:][classes == 1 ] - if len(class1) > 0: - plt.plot(xarray(class1), yarray(class1), '.r', ms=20) - class2 = peaks[:][classes == 2 ] - if len(class2) > 0: - plt.plot(xarray(class2), yarray(class2), '.g', ms=20) - class3 = peaks[:][classes == 3 ] - if len(class3) > 0: - plt.plot(xarray(class3), yarray(class3), '.c', ms=20) - class4 = peaks[:][classes == 4 ] - if len(class4) > 0: - plt.plot(xarray(class4), yarray(class4), '.y', ms=20) - - # for p in peaks: # <-- - # plt.text(p.x , p.y, p.num) - - # plt.plot(tr, data[tr], '.g', ms=20) - plt.show() - -def linreg_pattern(peaks): - from sklearn import datasets, linear_model - from sklearn.metrics import mean_squared_error, r2_score - - peaksx = xarray(peaks) - peaksx = peaksx.reshape(-1,1) - #peaksh = heightarray(peaks) - #peakx = peak.x - # Create linear regression object - regr = linear_model.LinearRegression() - numbers = np.arange(len(peaks)).reshape(-1,1) - # Train the model using the training sets - regr.fit(numbers, peaksx) - - # Make predictions using the testing set - peakx_pred = regr.predict(len(peaks)) - # # The coefficients - # #print('Coefficients: \n', regr.coef_) - # # The mean squared error - # #print("Mean squared error: %.2f" - # % mean_squared_error(diabetes_y_test, diabetes_y_pred)) - # # Explained variance score: 1 is perfect prediction - # #print('Variance score: %.2f' % r2_score(diabetes_y_test, diabetes_y_pred) - - - # Plot outputs - #plt.scatter(peaksx, peaksh, color='black') - #plt.scatter(peakx, peakh_pred, color='blue') - - #plt.xticks(()) - #plt.yticks(()) - - # plt.show() - - return peakx_pred - -def linreg(peaks, peak): - from sklearn import datasets, linear_model - from sklearn.metrics import mean_squared_error, r2_score - - peaksx = xarray(peaks) - peaksx = peaksx.reshape(-1,1) - peaksh = heightarray(peaks) - peakx = peak.x - # Create linear regression object - regr = linear_model.LinearRegression() - - # Train the model using the training sets - regr.fit(peaksx, peaksh) - - # Make predictions using the testing set - peakh_pred = regr.predict(peakx) - - # # The coefficients - # #print('Coefficients: \n', regr.coef_) - # # The mean squared error - # #print("Mean squared error: %.2f" - # % mean_squared_error(diabetes_y_test, diabetes_y_pred)) - # # Explained variance score: 1 is perfect prediction - # #print('Variance score: %.2f' % r2_score(diabetes_y_test, diabetes_y_pred) - - - # Plot outputs - #plt.scatter(peaksx, peaksh, color='black') - #plt.scatter(peakx, peakh_pred, color='blue') - - #plt.xticks(()) - #plt.yticks(()) - - # plt.show() - - - - return peakh_pred - -def wp_transform(x): - import pywt - wp = pywt.WaveletPacket(data=x, wavelet='haar', mode='symmetric') - print('maxlevel: ', wp[''].maxlevel) - return (np.array([node.data for node in wp.get_level(wp[''].maxlevel, 'freq')])).flatten() - -def wpfeats(snips): - size = len(wp_transform(snips[0])) - wp = np.empty([len(snips), size]) - for i, snip in enumerate(snips): - print(wp_transform(snip)) - wp[i] = (wp_transform(snip)) - #wp = wp.T - print(wp[0]) - wpcoef = wp.T - print(wp[0]) - from sklearn.preprocessing import StandardScaler - wpcoef = StandardScaler().fit_transform(wpcoef) - coeffvalues = [] - for coeff in wpcoef: - stat, crit, sig = stats.anderson(coeff, dist = 'norm') - # coeffvalues.append(stat) - coeffvalues.append(np.sum(np.abs(coeff))) - coeffvalues = np.array(coeffvalues) - coeffs = np.argsort(coeffvalues)[::-1][:10] - print(coeffvalues[coeffs]) - return wp.T[coeffs] - - - - -def pc(cutsnippets, peaklist): - # (observations, features) matrix - M = np.empty([len(cutsnippets), len(cutsnippets[0])]) - for i, snip in enumerate(cutsnippets): - M[i] = snip[:] - from sklearn.preprocessing import StandardScaler - StandardScaler().fit_transform(M) - # #print(M.shape, ' Mshape') - # singular value decomposition factorises your data matrix such that: - # - # M = U*S*V.T (where '*' is matrix multiplication) - # - # * U and V are the singular matrices, containing orthogonal vectors of - # unit length in their rows and columns respectively. - # - # * S is a diagonal matrix containing the singular values of M - these - # values squared divided by the number of observations will give the - # variance explained by each PC. - # - # * if M is considered to be an (observations, features) matrix, the PCs - # themselves would correspond to the rows of S^(1/2)*V.T. if M is - # (features, observations) then the PCs would be the columns of - # U*S^(1/2). - # - # * since U and V both contain orthonormal vectors, U*V.T is equivalent - # to a whitened version of M. - - U, s, Vt = np.linalg.svd(M, full_matrices=False) - V = Vt.T - - # PCs are already sorted by descending order - # of the singular values (i.e. by the - # proportion of total variance they explain) - S = np.diag(s) - # PC = (s*V) - # PCs: - #print(U.shape) - #print(S.shape) - #print(V.shape) - #print(s[0], U[0,:]) - - #PC1 = (s[0] * U[:,0]) - #PC2 = (s[1] * U[:,1]) - #for i, p in enumerate(peaklist): - # p.pc1 = PC1[i] - # p.pc2 = PC2[i] - - #mu = peaks.mean(axis=0) - #fig, ax = plt.subplots() - #ax.scatter(xData, yData) - #for axis in U: - # start, end = mu, mu + sigma * axis - # ax.annotate( - # '', xy=end, xycoords='data', - # xytext=start, textcoords='data', - # arrowprops=dict(facecolor='red', width=2.0)) - #ax.set_aspect('equal') - #plt.show() - - - # if plot_steps: - # plt.scatter(PC1, PC2) - # plt.show() - - # PCData1 = (U[:,0]*M) - # PCData2 = (U[:,1]*M) - # plt.scatter(PCData1, PCData2) - # plt.show() - - #plt.scatter(U[:,0],U[:,1]) - #plt.show() - #print('done') - #return PC - - # if we use all of the PCs we can reconstruct the noisy signal perfectly - #Mhat = np.dot(U, np.dot(S, V.T)) - #print('Using all PCs, MSE = %.6G' %(np.mean((M - Mhat)**2))) - - #plt.show() - return S@U.T - -def gettime(x, samplerate, starttime): - startm = int(starttime[-2:]) - starth = int(starttime[:-2]) - seconds = x/samplerate - m, s = divmod(seconds, 60) - m = m + startm - h, m = divmod(m, 60) - h = h+starth - return "%d:%02d:%02d" % (h, m, s) - -def connect_blocks(oldblock): - newblock = Peaklist([]) - newblock.lastofclass = oldblock.lastofclass - newblock.lastofclassx = oldblock.lastofclassx - newblock.classesnearby = oldblock.classesnearby - newblock.classesnearbypccl = oldblock.classesnearbypccl - newblock.classesnearbyx = [clnearbyx - oldblock.len for clnearbyx in oldblock.classesnearbyx] - newblock.classamount = oldblock.classamount - return newblock - ##print('classesnearbyx! old, new ' , oldblock_len,oldblock.classesnearbyx , newblock.classesnearbyx) - -if __name__ == '__main__': - main() - - - -# deleted Code, but unsure if really want to delete: - - #nix #print( b.data_arrays) - - # for cl in np.unique(cllist): - - # currentfish_x = x[:][cllist == cl] - # currentfish_y = y[:][cllist == cl] - # currentfish_h = x[:][cllist == cl] - - - #nix try: - #nix xpositions[cl] = b.create_data_array("f%d_eods" %cl, "spiketimes", data = currentfish_x) - #nix xpositions[cl].append_set_dimension() - #nix # thisfish_eods = b.create_multi_tag("f%d_eods_x"%cl, "eods.position", xpositions[cl]) - #nix # thisfish_eods.references.append(nixdata) - #nix except nix.pycore.exceptions.exceptions.DuplicateName: - #nix - #nix xpositions[cl].append(currentfish_x) - - - #thisfish_eods.create_feature(y, nix.LinkType.Indexed) - #b.create_multi_tag("f%d_eods_y"%cl, "eods.y", positions = y) - #b.create_multi_tag("f%d_eods_h"%cl, "eods.amplitude", positions = h) - #thisfish_eods.create_feature - - - - -# in analyseEods -# in analyseEods classlist = eods[3] #np.vectorize(lambda peak: peak.cl, otypes=[object])(worldpeaks.list) -# in analyseEods fishclass = {} -# in analyseEods #print('classlist: ', classlist) -# in analyseEods # #print('Classes at end: ', np.unique(classlist)) -# in analyseEods -# in analyseEods -# in analyseEods fishes = {} -# in analyseEods for num in np.unique(classlist): -# in analyseEods fishes[num] = eods[:,:][: , classlist == num] -# in analyseEods -# in analyseEods -# in analyseEods -# in analyseEods -# in analyseEods fishes = fill_hidden_3(fishes) # cl-dict : x y z -dict -# in analyseEods #maxlencl = max(fishes, key=lambda k: fishes[k]['x'][-1]-fishes[k]['x'][0]) -# in analyseEods -# in analyseEods fishes, weirdparts = fill_holes(fishes) -# in analyseEods fishes, weirdparts = fill_holes(fishes) -# in analyseEods -# in analyseEods for cl in np.unique(classlist): -# in analyseEods isi = [isi for isi in np.diff(fishes[cl]['x'])] -# in analyseEods fishes[cl][3]= isi -# in analyseEods - - -#npFish -#npFish npFishes = {} -#npFish fishfeaturecount = len(fishes[cl]) -#npFish for cl in np.unique(classlist): -#npFish npFishes[cl]= np.zeros([fishfeaturecount, len(fishes[cl]['x'])]) -#npFish for i, feature in enumerate(['x', 'y', 'h', 'isi']): #enumerate(fishes[cl]): -#npFish if feature == 'isi': -#npFish fishes[cl][feature].append(fishes[cl][feature][-1]) -#npFish # #print(feature, cl) -#npFish npFishes[cl][i] = np.array(fishes[cl][feature]) -#npFish # #print(npFishes[classlist[0]][0]) -#npFish # #print(npFishes[classlist[0]][2]) -#npFish # #print(npFishes[classlist[0]][3]) -#npFish #np.savetxt('worldpeaks_x_y_cl_2', (x,y,cl, isi), fmt="%s") -#npFish -#npFish np.set_printoptions(threshold=np.nan) -#npFish -#npFish for i, cl in enumerate(np.unique(classlist)): #Neue Klassennamen! -#npFish x = npFishes[cl][0] -#npFish y = npFishes[cl][1] -#npFish h = npFishes[cl][2] -#npFish isi = npFishes[cl][3] -#npFish -#npFish np.savetxt(filename[:-4]+'Fish_xyhisi_cl%d' % i, npFishes[cl], fmt="%s") -#npFish -#npFish -#npFish - - - - - - # / TODO: Peakclassifikator bei weit wegliegenden klassen? Done - # / TODO: Class2 implementation auf class linreg übertragen Done - Doof - # TODO: Klassen zusammenfuegen/ Noise zusammenfuegen - # - Wenn last 3 und first 3 zueinander passen in 1. Amplitude und 2. Periode (falls peaks) oder 2. randomzeugs? - Noiseerkennung und 2. Amplitude - # TODO: Klassen filtern auf Patternausreißer - # diff --git a/thunderfish/DextersThunderfishAddition/analyseDexRefactorShort.py b/thunderfish/DextersThunderfishAddition/analyseDexRefactorShort.py deleted file mode 100644 index 11acef89..00000000 --- a/thunderfish/DextersThunderfishAddition/analyseDexRefactorShort.py +++ /dev/null @@ -1,1995 +0,0 @@ -# Script to detect and classify EODs in recordings of weakly electric pulse -# fish, Dexter Früh, 2018 -# # it is suggested to save the recording in -# workingdirectory/recording/recording.WAV - -# results will be saved in workingdirectory/recording/ -# -# input: -# - [Recorded Timeseries] recording.WAV -# outputs(optional): -# - [Detected and Classified EODs] -# (Numpy Array with Shape (Number of EODs, 4 (Attributes of EODs)), -# with the EOD-Attributes -# - x-location of the EOD -# (time/x-coordinate/datapoint in recording) -# - y-location of the EOD -# (Amplitude of the positive peak of the pulse-EOD) -# - height of the EOD(largest distance between peak and through in the EOD) -# - class of the EOD -# eods_recording.npy -# - [plots of the results of each analyse step for each -# analysepart (timeinterval of length = deltat) of the recording] -# -# required command line arguments at function call -# - save : if True, save the results to a numpy file (possibly -# overwrite existing) -# - plot : if True, plot results in each analysestep -# - new : if True, do a new analysis of the recording, even if there -# is an existing analyzed .npy file with the right name. -# -# call with: -# python3 scriptname.py save plot new (starttime endtime[sec] for only -# partial analysis) -# -# other parameters are behind imports and some hardcoded at the relevant -# codestep -import sys -import numpy as np -import copy -from scipy.stats import gmean -from scipy import stats -from scipy import signal -from scipy import optimize -import matplotlib -from fish import ProgressFish -import matplotlib.pyplot as plt -from thunderfish.dataloader import open_data -from thunderfish.peakdetection import detect_peaks -from scipy.interpolate import interp1d -from scipy.signal import savgol_filter -from collections import deque -import ntpath -import nixio as nix -import time -import os -from shutil import copy2 - -from ownDataStructures import Peak, Tr, Peaklist -import DextersThunderfishAddition as dta - -from IPython import embed -# parameters for the analysis - -deltat = 30.0 # seconds of buffer size -thresh = 0.04 # minimal threshold for peakdetection -peakwidth = 20 # width of a peak and minimal distance between two EODs -# basic parameters for thunderfish.dataloader.open_data -verbose = 0 -channel = 0 -# timeinterval to analyze other than the whole recording -#starttime = 0 -#endtime = 0 -#timegiven = False - -def main(): # analyse_dex.py filename save plot new (optional starttime endtime [sec]) - home = os.path.expanduser('~') - os.chdir(home) - # defaults for optional arguments - timegiven = False - plot_steps = False - # parse command line arguments - filepath, save, plot, new (, starttime, - filepath = sys.argv[1] - save = int(sys.argv[2]) - plot_steps = int(sys.argv[3]) - new = int(sys.argv[4]) - if len(sys.argv[:])>5: - timegiven = True - starttime = int(sys.argv[5]) - endtime = int(sys.argv[6]) - #print(starttime, endtime) - peaks = np.array([]) - troughs = np.array([]) - cutsize = 20 - maxwidth = 50 #10 - ultimate_threshold = thresh+0.01 - filename = path_leaf(filepath) - proceed = input('Currently operates in home directory. If given a pulsefish recording filename.WAV, then a folder filename/ will be created in the home directory and all relevant files will be stored there. continue? [y/n] ').lower() - if proceed == 'n': - quit() - elif proceed == 'y': - pass - #do something - elif proceed != 'y': - quit() - datasavepath = filename[:-4] - print(datasavepath) - eods_len = 0 - ### ## starting analysis - if new == 1 or not os.path.exists(filename[:-4]+"/eods5_"+filename[:-3]+"npy"): - ### ## import data - with open_data(filepath, channel, deltat, 0.0, verbose) as data: - if save == 1 or save == 0: - if not os.path.exists(datasavepath): - os.makedirs(datasavepath) - copy2(filepath, datasavepath) - samplerate = data.samplerate - ### ## split datalength into smaller blocks - nblock = int(deltat*data.samplerate) - if timegiven == True: - parttime1 = starttime*samplerate - parttime2 = endtime*samplerate - data = data[parttime1:parttime2] - if len(data)%nblock != 0: - blockamount = len(data)//nblock + 1 - else: - blockamount = len(data)//nblock - bigblock = [] - ### ## output first (0%) progress bar - print('blockamount: ' , blockamount) - progress = 0 - print(progress, '%' , end = " ", flush = True) - fish = ProgressFish(total = blockamount) - olddatalen = 0 - startblock = 0 - ## iterating through the blocks, detecting peaks in each block - for idx in range(startblock, blockamount): - ### ## print progress - if progress < (idx*100 //blockamount): - progress = (idx*100)//blockamount - progressstr = 'Partstatus: '+ str(0) + ' '*2 + ' % (' + '0' + ' '*4+ '/' + '?'+' '*4+ '), Filestatus:' - fish.animate(amount = idx, dexextra = progressstr) - progressstr = 'Partstatus: '+ 'Part ' + '0'+ '/''5'+' Filestatus:' - fish.animate(amount = idx, dexextra = progressstr) - datx = data[idx*nblock:(idx+1)*nblock] - # ---------- analysis -------------------------------------------------------------------------- - # step1: detect peaks in timeseries - pk, tr = detect_peaks(datx, thresh) - troughs = tr - # continue with analysis only if multiple peaks are detected - if len(pk) > 2: - peaks = dta.makeeventlist(pk,tr,datx,peakwidth) - #dta.plot_events_on_data(peaks, datx) - peakindices, peakx, peakh = dta.discardnearbyevents(peaks[0],peaks[1],peakwidth) - peaks = peaks[:,peakindices] - progressstr = 'Partstatus: '+ 'Part ' + '1'+ '/''5'+' Filestatus:' - fish.animate(amount = idx, dexextra = progressstr) - if len(peaks) > 0: - ### ## connects the current part with the one that came before, to allow for a continuous analysis - if idx > startblock: - print('peaklist.len: ',peaklist.len) - peaklist = dta.connect_blocks(peaklist) - print(peaklist.len, peaklist.classesnearbyx) - else: - peaklist = Peaklist([]) - aligned_snips = dta.cut_snippets(datx,peaks[0], 15, int_met = "cubic", int_fact = 10,max_offset = 1.5) - progressstr = 'Partstatus: '+ 'Part ' + '2'+ '/''5'+' Filestatus:' - fish.animate(amount = idx, dexextra = progressstr) - # calculates principal components - pcs = dta.pc(aligned_snips)#pc_refactor(aligned_snips) - #print('dbscan') - order = 5 - minpeaks = 3 if deltat < 2 else 10 - labels = dta.cluster_events(pcs, peaks, order, 0.4, minpeaks, False, olddatalen, method = 'DBSCAN') - #print('peaks before align', peaks) - peaks = np.append(peaks,[labels], axis = 0) - #dta.plot_events_on_data(peaks, datx) - olddatalen = len(datx) - num = 1 - progressstr = 'Partstatus: '+ 'Part ' + '3'+ '/''5'+' Filestatus:' - fish.animate(amount = idx, dexextra = progressstr) - # classifies the peaks using the data from the clustered classes and a simple amplitude-walk which classifies peaks as different classes if their amplitude is too far from any other classes' last three peaks - #peaks[3]=[-1]*len(peaks[3]) - if idx > startblock: - dta.alignclusterlabels(labels, peaklist, peaks,data=datx) - print(peaklist.classesnearby) - peaks, peaklist = dta.ampwalkclassify3_refactor(peaks, peaklist) # classification by amplitude - print(peaklist.classesnearby) - #join_count=0 - # while True and joincc(peaklist, peaks) == True and join_count < 200: - # join_count += 1 - # continue - # discards all classes that contain less than mincl EODs - minlen = 6 # >=1 - peaks = dta.discard_short_classes(peaks, minlen) - if len(peaks[0]) > 0: - peaks = dta.discard_wave_pulses(peaks, datx) - # plots the data part and its detected and classified peaks - if plot_steps == True: - dta.plot_events_on_data(peaks, datx) - pass - # map the analyzed EODs of the buffer part to the whole - # recording - worldpeaks = np.copy(peaks) - # change peaks location in the buffered part to the location relative to the - peaklist.len = nblock - # peaklocations relative to whole recording - worldpeaks[0] = worldpeaks[0] + (idx*nblock) - thisblock_eods = np.delete(peaks,3,0) - thisblockeods_len = len(thisblock_eods[0]) - progressstr = 'Partstatus: '+ 'Part ' + '4'+ '/''5'+' Filestatus:' - fish.animate(amount = idx, dexextra = progressstr) - # save the peaks of the current buffered part to a numpy-memmap on the disk - if thisblockeods_len> 0 and save == 1 or save == 0: - if idx == 0: - eods = np.memmap(datasavepath+"/eods_"+filename[:-3]+"npmmp", dtype='float64', mode='w+', shape=(4,thisblockeods_len), order = 'F') - dtypesize = 8#4 #float32 is 32bit = >4< bytes long ---changed to float64 -> 8bit - eods = np.memmap(datasavepath+"/eods_"+filename[:-3]+"npmmp", dtype='float64', mode='r+', offset = dtypesize*eods_len*4, shape=(4,thisblockeods_len), order = 'F') - eods[:] = thisblock_eods - eods_len += thisblockeods_len - # to clean the plt buffer... - plt.close() - # get and print the measured times of the algorithm parts for the - # current buffer - progressstr = 'Partstatus: '+ 'Part ' + '5'+ '/''5'+' Filestatus:' - fish.animate(amount = idx, dexextra = progressstr) - # plt.show() - # after the last buffered part has finished, save the memory mapped - # numpy file of the detected and classified EODs to a .npy file to the - # disk - eods = np.memmap(datasavepath+"/eods_"+filename[:-3]+"npmmp", dtype='float64', mode='r+', shape=(4,eods_len), order = 'F') - print('before final saving: print unique eodcl: ' , np.unique(eods[3])) - if save == 1: - # #print('eods', eods[3]) - path = filename[:-4]+"/" - if not os.path.exists(path): - os.makedirs(path) - if eods_len > 0: - print('Saved!') - np.save(filename[:-4]+"/eods8_"+filename[:-3]+"npy", eods) - else: - #np.save(filename[:-4]+"/eods5_"+filename[:-3]+"npy", thisblock_eods) - print('not saved') - - else: # if there already has been a certain existing result file and 'new' was set to False - print('already analyzed') - - - # not used data implementation using NIX - # Save Data - - # Needed: - # Meta: Starttime, Startdate, Length - # x, y, h, cl, difftonextinclass -> freq ? , - - # Later: Find "Nofish" - # Find "Twofish" - # Find "BadData" - # Find "Freqpeak" - # ? Find "Amppeak" - # - - # bigblock = np.array(bigblock) - # x=xarray(bigblock) - # y=yarray(bigblock) - # cl=clarray(bigblock) - - - #nix file = nix.File.open(file_name, nix.FileMode.ReadWrite) - #nix b = file.blocks[0] - #nix nixdata = b.data_arrays[0] - #nix cldata = [] - #nix #print(classes) - #nix #print(b.data_arrays) - #nix for i in range(len(np.unique(classes))): - #nix cldata.append(b.data_arrays[i+1]) - - - # for cl in - - # for cl in - # x = thisfish_eods - - - #nix file.close() - -def path_leaf(path): - ntpath.basename("a/b/c") - head, tail = ntpath.split(path) - return tail or ntpath.basename(head) - -def fill_hidden(fishclasses): - - fishes = fishclasses - - nohidefishes = {} - for cl in fishes: - x =[] - y = [] - h = [] - fish = fishes[cl] - # #print('fish', fish) - fishisi = calcisi(fish) - isi = fishisi[0] - for i, newisi in enumerate(fishisi): - leftpeak = fish[i] - x.append(leftpeak.x) - y.append(leftpeak.y) - h.append(leftpeak.height) - if newisi > 2.8*isi: - guessx = leftpeak.x + isi - - while guessx < leftpeak.x + newisi-0.8*isi: - - peakx = peakaround(guessx, isi*0.1, fishes) - if peakx is not None: - x.append(peakx) - y.append(leftpeak.y) - h.append(leftpeak.height) - guessx = peakx+ isi + (peakx-guessx) - - continue - break - isi = newisi - nohidefishes[cl]= {'x':x,'y':y,'h':h} - return nohidefishes - -def plotheights(peaklist): - heights = heightarray(peaklist) - x_locations = xarray(peaklist) - plt.scatter(x_locations, heights) - plt.show() - -def ploteods(eods, data): - plt.plot(range(len(data)),data, color = 'black') - classlist = eods[3] - cmap = plt.get_cmap('jet') - colors =cmap(np.linspace(0, 1.0, 3000)) #len(np.unique(classlist)))) - np.random.seed(22) - np.random.shuffle(colors) - colors = [colors[cl] for cl in np.unique(classlist)] - # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.c', ms=20) - x=0 - if len(classlist)>0: - # #print(classlist) - # #print('classes: ' , np.unique(classlist)) - from collections import Counter - count = Counter(classlist) - # #print('longest class: ', count.most_common()[0]) - for num, color in zip(np.unique(classlist), colors): - peaksofclass = eods[:,:][:, classlist == num] - #xpred = linreg_pattern(peaksofclass[0:3]) - #for p in peaksofclass[0:3]: - # #print(p.x) - ##print(xpred, peaksofclass[3].x) - - #if len(peaksofclass) > 1000: - # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.', color = 'red', ms =20) - #else: - plt.plot(peaksofclass[0], peaksofclass[1], '.', color = color, ms =20) - plt.show() - -def fill_hidden_3(fishes): - - fishes = fishes - - nohidefishes = {} - for cl, fish in fishes.items(): - x =[] - y = [] - h = [] - # fish = fishes[cl] passt net, fishes is np.array mit (cl, (xyh)) - fishisi = np.diff(fish[0]) - isi = fishisi[0] - for i, newisi in enumerate(fishisi): - leftpeak = i - x.append(fish[0][i]) - y.append(fish[1][i]) - h.append(fish[2][i]) - # #print(cl, fish[0][i], isi, newisi) - if newisi > 2.8*isi: - guessx = fish[0][i] + isi - - while guessx < fish[0][i] + newisi-0.8*isi: - - peakx = peakaround3(guessx, isi*0.1, fishes) - if peakx is not None: - # #print(jup) - x.append(peakx) - y.append(fish[1][i]) - h.append(fish[2][i]) - guessx = peakx+ isi + (peakx-guessx) - - continue - break - isi = newisi - nohidefishes[cl]= {'x':x,'y':y,'h':h} - - return nohidefishes - -def peakaround2(guessx, interval, fishes): - found = False - for cl, fish in fishes.items(): - for px in fish['x']: - distold = interval - if px < guessx-interval: - continue - # #print('in area', guessx-interval) - if guessx-interval < px < guessx+interval: - found = True - dist = px-guessx - if abs(dist) < abs(distold): - distold = dist - if px > guessx+interval: - if found == True: - # #print(guessx, dist) - return guessx + dist - else: break - return None - -def peakaround3(guessx, interval, fishes): - found = False - for cl, fish in fishes.items(): - for px in fish[0]: - distold = interval - if px < guessx-interval: - continue - # #print('in area', guessx-interval) - if guessx-interval < px < guessx+interval: - found = True - dist = px-guessx - if abs(dist) < abs(distold): - distold = dist - if px > guessx+interval: - if found == True: - # #print(guessx, dist) - return guessx + dist - else: break - return None - -def peakaround(guessx, interval, fishes): - found = False - for cl, fish in fishes.items(): - for peak in fish: - - distold = interval - if peak.x < guessx-interval: - continue - # #print('in area') - if guessx-interval < peak.x < guessx+interval: - found = True - dist = peak.x-guessx - if abs(dist) < abs(distold): - distold = dist - if peak.x > guessx+interval: - if found == True: - # #print(guessx, dist) - return guessx + dist - else: break - return None - -def fill_holes(fishes): #returns peakx, peaky, peakheight # Fills holes that seem to be missed peaks in peakarray with fake (X/Y/height)-Peaks - retur = {} - lost = {} - for cl, fish in fishes.items(): - fishisi = np.diff(fish['x']) - mark = np.zeros_like(fishisi) - isi = 0 - ##print('mark', mark) - # #print('fishisi' , fishisi) - #find zigzag: - c=0 - c0= 0 - n=0 - for i, newisi in enumerate(fishisi): - if abs(newisi - isi)>0.15*isi: - if (newisi > isi) != (fishisi[i-1] > isi): - c+=1 - # #print(abs(newisi - isi), 'x = ', fish[i].x) - c0+=1 - elif c > 0: - n += 1 - if n == 6: - if c > 6: - # print ('zigzag x = ', fish['x'][i-6-c0], fish['x'][i-6]) - mark[i-6-c0:i-6]= -5 - c = 0 - c0=0 - n = 0 - - #if c > 0: - # #print(i, c) - # if c == 6: - # #print('zigzag!') - isi = newisi - isi = 0 - for i, newisi in enumerate(fishisi): - ##print('mark: ' , mark) - if mark[i] == -5: continue - if i+2 >= len(fishisi): - continue - if (2.2*isi > newisi > 1.8*isi) and (1.5*isi>fishisi[i+1] > 0.5*isi) : - mark[i] = 1 - isi = newisi - # #print('found 1!' , i) - elif (2.2*isi > newisi > 1.8*isi) and (2.2*isi> fishisi[i+1] > 1.8*isi) and (1.5*isi > fishisi[i+2] > 0.5*isi): - mark[i] = 1 - isi = isi - elif 3.4*isi > newisi > 2.6*isi and 1.5*isi > fishisi[i+1] > 0.5*isi: - mark[i] = 2 - - elif (0.6* isi > newisi > 0): - # #print('-1 found', i ) - if mark[i] ==0 and mark[i+1] ==0 and mark[i-1]==0 : - # isi = newisi - # continue - # #print('was not already set') - if fishisi[i-2] > isi < fishisi[i+1]: - mark[i] = -1 - # #print('-1') - elif isi > fishisi[i+1] < fishisi[i+2]: - mark[i+1] = -1 - # #print('-1') - isi = newisi - filldpeaks = [] - x = [] - y = [] - h = [] - x_lost=[] - y_lost=[] - h_lost=[] - # #print('filledmarks: ', mark) - for i, m in enumerate(mark): - if m == -1 : - # #print('-1 at x = ', fish['x'][i]) - continue - if m == -5: - x_lost.append(fish['x'][i]) - y_lost.append(fish['y'][i]) - h_lost.append(fish['h'][i]) - x.append(fish['x'][i]) - y.append(fish['y'][i]) - h.append(fish['h'][i]) - continue - x.append(fish['x'][i]) - y.append(fish['y'][i]) - h.append(fish['h'][i]) - if m == 1: - # #print('hofly added peak at x = ' , fish['x'][i]) - x.append(fish['x'][i] + fishisi[i-1]) - y.append( 0.5*(fish['y'][i]+fish['y'][i+1])) - h.append(0.5*(fish['h'][i]+fish['h'][i+1])) - elif m== 2: - x.append(fish['x'][i] + fishisi[i]) - y.append( 0.5*(fish['y'][i]+fish['y'][i+1])) - h.append(0.5*(fish['h'][i]+fish['h'][i+2])) - x.append(fish['x'][i] + 2*fishisi[i-1]) - y.append( 0.5*(fish['y'][i]+fish['y'][i+2])) - h.append(0.5*(fish['h'][i]+fish['h'][i+2])) - # #print('added at x = ', fish['x'][i] + fishisi[i]) - retur[cl] = {'x':x,'y':y,'h':h} - lost[cl] = {'xlost':x_lost,'ylost':y_lost,'hlost':h_lost} - # filledpeaks =np.array(filledpeaks) - # #print(filledpeaks.shape) - # filledpeaks. - return retur, lost - -def calc_tsh_noise(peaks, data): - heights = np.vectorize(lambda peak: peak.height)(peaks) - # peakx = xarray(peaks) - # peakxlist = peakx.tolist() - # #print('datenstdanfang: ', np.std(data)) - # datatsh = np.mean(np.abs(data))# - # datatsh = 2* np.std(data) - # peakareas = [i for x in peakx for i in range(x-10, x+10) if (i < len(data))] - # peakareas = np.arange(peakx-10, peakx+10, 1) - # relevantdata = [] - #peakareas = np.unique(peakareas) - # #print(len(peakareas), len(data), ' len peakarea and data' , datatsh) - #relevantdata is the data without the areas around the peaks, to calculate the standard deviation of the noise - #c = 0 - tsh = 0.1*np.std(heights) - - #for i, dat in enumerate(data): - # if peakareas[c] == i and c dist: - # dist = tdist - #print('dist', dist) - if dist>=0: - valid = True - if olddatalen > 0: - alignlabels(labels, peaks, olddatalen) - for i, p in enumerate(peaklist): - pcclasses[peaknum] = labels[i] - return valid - if takekm: - km = KMeans(n_clusters=3, n_init = 3, init = 'random', tol=1e-5, random_state=170, verbose = True).fit(X) - core_samples_mask = np.zeros_like(km.labels_, dtype=bool) - labels = km.labels_ - if takekm: - for i, p in enumerate(peaklist): - # print('label ', labels[i]) - pcclasses[peaknum] = p.pccl - # Number of clusters in labels, ignoring noise if present. - n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) - #print('Estimated number of clusters: %d' % n_clusters_) - # ############################################################################# - # Plot result - # Black removed and is used for noise instead. - unique_labels = set(labels) - colors = [plt.cm.Spectral(each) - for each in np.linspace(0, 1, len(unique_labels))] - fig = plt.figure() - ax = fig.add_subplot(111, projection = '3d') - for k, col in zip(unique_labels, colors): - if k == -1: - # Black used for noise. - col = [0, 0, 0, 1] - class_member_mask = (labels == k) - xy = X[class_member_mask] - # print(col) - ax.plot(xy[:, 0], xy[:, 1],xy[:,2], 'o', markerfacecolor=tuple(col), - markeredgecolor='k', markersize=14) - ax.set_title('Estimated number of clusters: %d' % n_clusters_) - #plt.show() - - - from sklearn.neighbors import kneighbors_graph - knn_graph = kneighbors_graph(X, 15, include_self=False) - ac = AgglomerativeClustering(linkage = 'complete', n_clusters = 3, connectivity = knn_graph).fit(X) - core_samples_mask = np.zeros_like(ac.labels_, dtype=bool) - labels = ac.labels_ - if takekm: - for i, p in enumerate(peaklist): - print('label ', labels[i]) - pcclasses[peaknum] = labels[i] - # Number of clusters in labels, ignoring noise if present. - n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) - #print('Estimated number of clusters: %d' % n_clusters_) - # ############################################################################# - # Plot result - # Black removed and is used for noise instead. - unique_labels = set(labels) - colors = [plt.cm.Spectral(each) - for each in np.linspace(0, 1, len(unique_labels))] - fig = plt.figure() - ax = fig.add_subplot(111, projection = '3d') - for k, col in zip(unique_labels, colors): - if k == -1: - # Black used for noise. - col = [0, 0, 0, 1] - class_member_mask = (labels == k) - xy = X[class_member_mask] - print(col) - ax.plot(xy[:, 0], xy[:, 1],xy[:,2], 'o', markerfacecolor=tuple(col), - markeredgecolor='k', markersize=14) - ax.set_title('Estimated number of clusters: %d' % n_clusters_) - #plt.show() - -def ampwalkclassify3_refactor(peaks,peaklist): # final classificator - classamount = peaklist.classamount - lastofclass = peaklist.lastofclass # dict of a lists of the last few heightvalues of a class, f.E ((1,[0.7,0.68,0.71]), (5, [0.2, 0.21, 0.21])) - lastofclassx = peaklist.lastofclassx # dict of a list of the last few x-values of a class - a=0 - elem = 0 - thresholder = [] - comperr = 1 - classesnearby = peaklist.classesnearby # list of the classes of the last n peaks f.E:[1,2,1,2,1,3,2,1,...] - classesnearbyx = peaklist.classesnearbyx # list of the x-values of the last n peaks, f.E:[13300, 13460, 13587, 13690, 13701, ...] - classesnearbypccl = peaklist.classesnearbypccl # list of the pc-classified classes of the last n peaks - classes = np.zeros((len(peaks[0]))) - if len(peaks) >3: - pcclasses = peaks[3] - print('ERROOR') - positions = peaks[0] - heights = peaks[1] - - # #print('nearbyclasses at start:' ,classesnearby, classesnearbyx) - # for peak in peaks: - # peak.cl = peak.pccl+2 - # peaklist.classlist = np.vectorize(lambda peak: peak.cl, otypes=[object])(peaklist.list) - # return peaks - cl = 0 - maxdistance = 30000 # Max distance to possibly belong to the same class - factor = 1.6 # factor by which a peak fits into a class, f.E: classheight = 1 , factor = 2 => peaks accepted in range (0.5,2) - c=0 - peakamount = len(peaks.T) - #fish = ProgressFish(total = peakamount) - for peaknum, p in enumerate(peaks.T): - perc = str((peaknum*100)//peakamount) - # fish.animate(amount = "", dexextra = 'Partstatus: '+ ' '*(3-len(perc)) +perc + ' % (' + ' '*(4-len(str(peaknum)))+str(peaknum) + '/' + ' ' *(4-len(str(peakamount)))+str(peakamount) + '), Filestatus:') - awc_btime = [] - if len(lastofclass) == 0: # Dict with all classes, containing the heights of the last few peaks - lastofclass[1] = deque() - lastofclassx[1]= deque() - lastofclass[1].append(heights[peaknum]) - lastofclassx[1].append(positions[peaknum]) - classesnearby.append(1) - classesnearbyx.append(-1) - classesnearbypccl.append(pcclasses[peaknum]) - classes[peaknum] = 1 - classamount += 1 - continue - time1 = time.time() - for i, cl in enumerate(classesnearby): - if (positions[peaknum]-classesnearbyx[i]) > maxdistance: - classesnearby.pop(i) - classesnearbyx.pop(i) - classesnearbypccl.pop(i) - lastofclassisis = [] - for i in classesnearby: - # print(i, classesnearby) - lastofclassisis.append(np.median(np.diff(lastofclassx[i]))) - meanisi = np.mean(lastofclassisis) - if 32000 > 20*meanisi> 6000: - maxdistance = 20*meanisi - #print(meanisi, maxdistance , 'maxdistance ----------------------------------------------------------------------------------------------') - time2 = time.time() - awc_btime.append(time2-time1) #0 - cl = 0 # 'No class' - comperr = 1 - ##print('classesnearby at a peak', classesnearby) - clnrby = np.unique(classesnearby) - time1 = time.time() -# classmean = 0 - # if pcclasses[peaknum] == -1: - # factor = 1.2 - # else: - # factor = 1.6 - for i in clnrby: - #print('cl: ', i) - # if classesnearbypccl[classesnearby.index(i)] == -1: - # factor = 2.2 - # else: factor = 1.6 - classmean = np.mean(lastofclass[i]) - logerror = np.abs(np.log2(heights[peaknum])-np.log2(classmean)) - abserror = np.abs(heights[peaknum]-classmean) - logthresh = np.log2(factor) - #ä#print(np.std(lastofclass[i])) absthresh = 0.5*classmean # #print('test log', np.abs(np.log2(np.array([0.4,0.5,1,1.5,2,2.4]))-np.log2(np.array([1,1,1,1,1,1]))) ) # abs(classmean*0.5) - #relerror = error - relerror = logerror - #relabserror = abserror/thresh - # if 1140 < p.num < 1150: - # print(p.num) - # print('for classes at one peak: classmean, height, abserror, thresh', - # classmean,heights[peaknum], logerror, logthresh) - #print(len(classesnearbypccl), len(classesnearby)) - #print(classmean, heights[peaknum], logerror, logthresh, pcclasses[peaknum], classesnearbypccl[classesnearby.index(i)]) - if classesnearbypccl[classesnearby.index(i)] == pcclasses[peaknum] or pcclasses[peaknum] == -1:# or - if logerror < logthresh: ## SameClass-Condition - if relerror < comperr and (positions[peaknum]-classesnearbyx[classesnearby.index(i)]) 2*compareisierror: -# cl = holdlastcl - - time2 = time.time() - awc_btime.append(time2-time1) #1 - time1 = time.time() - if pcclasses[peaknum] != -1: - if cl != 0 : - #print(cl) - if len(lastofclass[cl]) >= 3: - lastofclass[cl].popleft() - if len(lastofclassx[cl]) >= 3: - lastofclassx[cl].popleft() - lastofclass[cl].append(heights[peaknum]) - lastofclassx[cl].append(positions[peaknum]) - classes[peaknum] = cl - else: # Add new class - cl = classamount+1 - #print('existingclasses: ', classamount) - classamount = cl - #print('newclass: ----------------------------------------------------------------', cl) - lastofclass[cl] = deque() - lastofclassx[cl] = deque() - lastofclass[cl].append(heights[peaknum]) - lastofclassx[cl].append(positions[peaknum]) - classes[peaknum] = cl - classesnearby.append(cl) - classesnearbyx.append(positions[peaknum]) - classesnearbypccl.append(pcclasses[peaknum]) - ##print('tatsaechlich: ', cl) - if len(classesnearby) >= 12: #kacke implementiert? - minind = classesnearbyx.index(min(classesnearbyx)) - del lastofclass[classesnearby[minind]] - del lastofclassx[classesnearby[minind]] - #print(classesnearby[minind], 'del') - classesnearby.pop(minind) - classesnearbyx.pop(minind) - classesnearbypccl.pop(minind) - # for ind, clnrby in enumerate(reversed(classesnearby)): - # classesnearbyx - # del lastofclass[classesnearby[ind]] - # # del lastofclassx[classesnearby[minind]] - # classesnearby.pop(minind) - # classesnearbyx.pop(minind) - try: - ind=classesnearby.index(cl) - classesnearbyx[ind] = positions[peaknum] - # #print(ind ,' --------------------------------------here -----------------------------') - except ValueError: - classesnearby.append(cl) - classesnearbyx.append(positions[peaknum]) - classesnearbypccl.append(pcclasses[peaknum]) - else: - if cl != 0: - classes[peaknum] = cl - else: - cl = classamount+1 - #print('existingclasses: ', classamount) - classamount = cl - #print('newclass: ', cl) - lastofclass[cl] = deque() - lastofclassx[cl] = deque() - lastofclass[cl].append(heights[peaknum]) - lastofclassx[cl].append(positions[peaknum]) - classes[peaknum] = cl - classesnearby.append(cl) - classesnearbyx.append(positions[peaknum]) - classesnearbypccl.append(pcclasses[peaknum]) - if len(classesnearby) >= 12: #kacke implementiert? - minind = classesnearbyx.index(min(classesnearbyx)) - del lastofclass[classesnearby[minind]] - del lastofclassx[classesnearby[minind]] - #print(classesnearby[minind], 'del') - classesnearby.pop(minind) - classesnearbyx.pop(minind) - classesnearbypccl.pop(minind) - # for ind, clnrby in enumerate(reversed(classesnearby)): - # classesnearbyx - # del lastofclass[classesnearby[ind]] - # # del lastofclassx[classesnearby[minind]] - # classesnearby.pop(minind) - # classesnearbyx.pop(minind) - try: - ind=classesnearby.index(cl) - classesnearbyx[ind] = positions[peaknum] - # #print(ind ,' --------------------------------------here -----------------------------') - except ValueError: - classesnearby.append(cl) - classesnearbyx.append(positions[peaknum]) - classesnearbypccl.append(pcclasses[peaknum]) - time2 = time.time() - # awc_btime.append(time2-time1) #2 - # classesnearby = [cls for cls in classesnearby if cls != False] - # classesnearbyx = [clx for clx in classesnearbyx if clx != False] - # - # - #print('awc_btime ', awc_btime , ' newpeak-------------------------------------------------------- :') - peaklist.lastofclass = lastofclass - peaklist.lastofclassx = lastofclassx - peaklist.classesnearby = classesnearby - peaklist.classesnearbyx = classesnearbyx - peaklist.classlist = classes # np.vectorize(lambda peak: peak.cl, otypes=[object])(peaklist.list) - peaklist.classamount = classamount - peaks = np.append(peaks,classes[None,:], axis = 0) - return peaks, peaklist - -def joincc(peaklist,peaks): - # connects classes that appear after each other... - # peaklist = peaks.list - joinedsome = False - classlist = peaks[4] - peaksofclass = {} - last = [] - connect = {} #connect classes in connect+ - classcount = dict.fromkeys(classlist, 0) - ##print(classcount) - #classcount = [0]*len(np.unique(classlist)) - # #print(np.unique(classlist)) - for cl in np.unique(classlist): - peaksofclass[cl]= peaks[:,classlist == cl] - for i in range(len(peaks[0])): # i is the increasing index of the peaks - p = peaks[:,i] - poc = peaksofclass[p[4]] - classcount[p[4]]+=1 - countclass = p[4] #the current class before it might be changed to the connected class - if p[4] in connect: - p[4] = connect[p[4]] #peakclass is changed to connected class - # #print('changed ', countclass, 'to', p.cl) - joinedsome = True - - if len(poc) == classcount[countclass]: #the current peak is last peak of its class - last = poc[-len(poc) if len(poc) <= 5 else 5:] #the last peaks of the class - # #print('last: ', last) - #mean_last = np.mean(np.vectorize(lambda peak: peak[2])(last)) - mean_last = np.mean(last[2,:]) - nextfirst = {} # the first peaks of the next coming class(es) - # #print('class: ', countclass, 'at x = ', p.x, 'mean_last: ', mean_last) - for nexti in range(20): # the next 10 peaks are considered if they belong to the same classe - if i + nexti >= len(peaks[0]): break - inextp = peaks[:,i+nexti] - if classcount[inextp[4]] == 0: #current peak is first peak of its class - # #print('found a new begin! its class:' , inextp.cl) - ponc = peaksofclass[inextp[4]] # - nextfirst[inextp[4]] = ponc[0:len(ponc) if len(ponc) <= 5 else 5] - # #print(np.mean(np.vectorize(lambda peak: peak.height)(nextfirst[inextp.cl]))) - # #print(nextfirst) - compare = 1 - c = 0 - nextclass = -1 - for nextcl, first in nextfirst.items(): - mean_nextfirst = np.mean(first[2,:])#np.mean(np.vectorize(lambda peak: peak.height)(first)) - # #print(mean_nextfirst) - error = abs(mean_nextfirst - mean_last)/(mean_nextfirst) - if error < 1: - if compare < error: - continue - compare = error - if nextcl in connect: #if the peak that ist considered belongs to a class, that is already supposed to be connected to the current class - pocc = peaksofclass[connect[nextcl]] #peaks of the currently supposed connected class - if ( abs(mean_nextfirst - np.mean(pocc[-len(pocc) if -len(pocc) <= 5 else 5:][2])) - < abs(mean_nextfirst - mean_last) ): - continue - nextclass = nextcl - if nextclass != -1: - connect[nextclass] = p[4] - # #print('connect ', p.cl , ' and ', nextcl) - for cl in peaklist.classesnearby: - if cl in connect: - # #print('cl, connect', cl, connect[cl]) - peaklist.classesnearby[peaklist.classesnearby.index(cl)] = connect[cl] - peaklist.lastofclass[connect[cl]]=peaklist.lastofclass[cl] - peaklist.lastofclassx[connect[cl]]= peaklist.lastofclassx[cl] - peaklist.classlist = peaks[4] - return joinedsome - # for poc in peaksofclass: - # if len(poc) >= 3: - # newlast = poc[-3:] - # first = poc[:3] - # else: - # newlast = poc[-len(poc):] - # first = poc[:len(poc)] - # if last != []: - # if abs(np.mean(first) - np.mean(last)) < 0: - # #print('oh') - -def discardwaves_refactor(peaks, data): - deleteclasses = [] - for cl in np.unique(peaks[3]): - peaksofclass = peaks[:,peaks[3] == cl] - isi = np.diff(peaksofclass[0]) - isi_mean = np.mean(isi) - # #print('isismean',isi_mean) - widepeaks = 0 - # #print('width',peaksofclass[2].width) - isi_tenth_area = lambda x, isi:np.arange(np.floor(x-0.1*isi),np.ceil(x+0.1*isi),1, dtype = np.int) - for p in peaksofclass.T: - data = np.array(data) - try: - for dp_around in data[isi_tenth_area(p[0],isi_mean)]:#np.floor(p[0]-0.1*isi_mean), np.ceil(p[0]+0.1*isi_mean),1)]:# - if dp_around <= p[1]-p[2]: - break - except IndexError: - pass - else: - widepeaks+=1 - ## p.isreal_pleateaupeaks() - if widepeaks > len(peaksofclass)*0.5: - deleteclasses.append(cl) - for cl in deleteclasses: - peaks = peaks[:,peaks[3]!=cl] - return peaks - -def smallclassdiscard(peaks, mincl): - classlist = peaks[3] - smallclasses = [cl for cl in np.unique(classlist) if len(classlist[classlist - == cl]) < - mincl] - delete = np.zeros(len(classlist)) - for cl in smallclasses: - delete[classlist == cl] == 1 - peaks = peaks[:,delete != 1] - return peaks - -def makepeak(data_x,cutsize, maxwidth, peakx, ltr, data_ltr, rtr, data_rtr, num, minhlr): - #if len(data) > peakx + cutsize/2: - return Peak(peakx, data_x, maketr(data_ltr, ltr), maketr(data_rtr, rtr), maxwidth, num, minhlr)#data[peakx-cutsize/2:peakx+cutsize/2], num) - #else: - # return Peak(peakx, data[peakx], - # maketr(data, ltr), - # maketr(data, rtr), - # maxwidth, - # #data[peakx-cutsize/2:-1], - # num) - -def maketr(data_x, x): - if x is not None: - return Tr(x,data_x) - else: - return None - -def makepeaklist(pkfirst, data, pk, tr, cutsize, maxwidth): - peaklist = np.empty([len(pk)], dtype = Peak) - trtopk = pkfirst - pktotr = 1-pkfirst - trlen = len(tr) - pklen = len(pk) - minhlr = lambda i, mwl, mwr : min( - abs( data[pk[i]] - min( data[pk[i]-mwl:pk[i]] ) if len(data[pk[i]-mwl:pk[i]]) > 0 else 0 ) - , - abs( data[pk[i]]- min( - data[pk[i]:pk[i]+mwr] ) if len(data[pk[i]:pk[i]+mwr]) > 0 else 0 ) - ) - #print(min( data[pk[0]-0:pk[2]]) ) - - if pktotr == 0: - peaklist[0] = makepeak(data[0], cutsize, maxwidth, pk[0], None, None, tr[pktotr], data[pktotr], 0, minhlr(0, 0, maxwidth)) - else: - peaklist[0] = makepeak(data[0], cutsize, maxwidth, pk[0], - tr[-trtopk], - data[-trtopk], tr[pktotr], data[pktotr], - 0, minhlr(0, min(maxwidth, - pk[0]-tr[-trtopk]) , maxwidth)) - for i in range(1,pklen-1): - peaklist[i] = makepeak(data[pk[i]], cutsize, maxwidth, pk[i], tr[i-trtopk], data[tr[i-trtopk]], tr[i+pktotr],data[tr[i+pktotr]], i, minhlr(i, maxwidth, maxwidth)) - if pktotr == 0 and pklen <= trlen: - peaklist[pklen-1] = makepeak(data[pk[pklen-1]],cutsize, maxwidth, pk[pklen-1], tr[pklen-trtopk-1], data[pklen-trtopk-1], tr[pklen+pktotr-1], data[pklen+pktotr-1], i, minhlr(pklen-1, maxwidth, min(maxwidth, tr[pklen+pktotr-1]-pk[pklen-1]))) - else: - peaklist[pklen-1] = makepeak(data[pk[pklen-1]],cutsize, maxwidth, pk[pklen-1], tr[pklen-trtopk-1],data[pklen-trtopk-1], None, None, pklen-1, minhlr(pklen-1, maxwidth, 0)) - return peaklist - -#def doublepeaks(peaks, peakwidth): -# dif2 = peaks[1].x-peaks[0].x -# if dif2 > 5* peakwidth: -# peaks[0].real = False -# for i in range(1,len(peaks)-1): -# dif1 = dif2 -# dif2 = peaks[i+1].x-peaks[i].x -# if dif1 > 5* peakwidth and dif2 > 5* peakwidth: -# peaks[i].real = False -# if dif2 > 5* peakwidth: -# peaks[len(peaks)-1] = False -# return peaks - -def discardunrealpeaks(peaklist): - peaks = peaklist[:][np.vectorize(lambda peak: peak.real, otypes=[object])(peaklist) == True] - for i, p in enumerate(peaks): - pass - # p.num = i - return peaks - -def discardnearbypeaks(peaks, peakwidth): - peaksx = xarray(peaks) - pkdiff = np.diff(peaksx) - # peakwidth = avg_peakwidth(pknum,tr) - pknumdel= np.empty(len(peaksx)) - pknumdel.fill(False) -# peaksy = yarray(peaks) - peaksh = heightarray(peaks) - for i,diff in enumerate(pkdiff): - # #print(peaks[i].height) - if diff < peakwidth: #* peaks[i].height: ### Trial Error - if peaksh[i+1] > 1.01 *peaksh[i] : - pknumdel[i] = True - else: - # print(peaksh[i],peaksh[i+1]) - pknumdel[i+1] = True - peaks = peaks[pknumdel!=True] - for i, p in enumerate(peaks): - p.num = i - return peaks - -def interpol(data, kind): - #kind = 'linear' , 'cubic' - width = len(data) - x = np.linspace(0, width-1, num = width, endpoint = True) - return interp1d(x, data[0:width], kind , assume_sorted=True) - -def cutcenter(peak): - p = peak - cut = p.cut - pl=p.distancetoltr - pr=p.distancetortr - if pl is None: - pl = 10 - tx = p.x-10 - else: tx = p.ltr.x - if pr is None: - pr = 10 - if pl < p.maxwidth and pr > 1: - - width=len(cut) - # #print('distancetoltr',pl) - peakshape = cut - interpolfreq = 1 - xnew = np.linspace(0,len(peakshape)-1, len(peakshape)*interpolfreq, endpoint= True) - curvyf = interpol(peakshape) - curvy= curvyf(xnew) - #px = p.cutsize/2 * 4 - #left = px - (5*4) - #plt.plot(xnew, curvy) - #x_0 = optimize.fsolve(curvyf, 1.0) - # f = interp1d(x, y) - # f2 = interp1d(range(width), data[x:x+width], kind='cubic') - ##xnew = np.linspace(0, width-1, num = width*4, endpoint = True) - ##print(xnew) - # plt.plot(xnew,f2(xnew)) - ##print("show") - #plt.show - trx = (p.cutsize/2 - (p.x - tx) ) - if trx >0 : - xstart = trx - else: - xstart = 0 - # #print('pkx: ', p.x, 'ltrx: ', p.ltr.x) - # #print('trx in intpol', x) - x = xstart - if curvyf(x) < 0: - left = 0 - right= 0 - while(x < width-1 and curvyf(x) < 0) : - left = x - # #print(curvyf(x)) - x+=0.25 - right = x - # #print('x: ', x , 'left, right: ', curvyf(left), curvyf(right)) - x = left+(1-curvyf(right)/(curvyf(right)-curvyf(left)))*1/interpolfreq - # #print(x) - else: - x = 0 - # #print(x_int) - # plt.scatter(xstart, curvyf(xstart), marker = 'x', s=150, zorder=2, linewidth=2, color='red') - # plt.scatter(x, curvyf(x), marker='x', s=150, zorder=2, linewidth=2, color='black') - # plt.show - # #print(x_int) - #p.relcutcenter = (p.ltr.x + x_int)-p.x - ##print('cent',p.relcutcenter) - #return (p.ltr.x + x_int)-p.x - - # while(data[x]>0) - else: - x= 0 - - return x - -def relcutarray(peaks): - return np.vectorize(lambda peak: peak.relcutcenter)(peaks) - -def xarray(peaks): - if len(peaks)>0: - peakx = np.vectorize(lambda peak: peak.x)(peaks) - return peakx - else: return [] - -def yarray(peaks): - if len(peaks)>0: - return np.vectorize(lambda peak: peak.y)(peaks) - else: return [] - -def heightarray(peaks): - if len(peaks)>0: - return np.vectorize(lambda peak: peak.height)(peaks) - else: return [] - -def clarray(peaks): - if len(peaks)>0: - return np.vectorize(lambda peak: peak.cl)(peaks) - else: return [] -def pcclarray(peaks): - if len(peaks)>0: - return np.vectorize(lambda peak: peak.pccl)(peaks) - else: return [] - -def peakxarray( ): - peakx = np.empty([len]) - peakx = np.vectorize(lambda peak: peak.x)(peaks) - return peakx - -def peakyarray( ): - peaky= np.empty([len]) - return np.vectorize(lambda peak: peak.y)(peaks) - - -def classify( ): - #template = peaks[0] - meanfit = np.mean(np.vectorize(fit, otypes=[object])(template,peaks)) - for p in peaks: - if fit(template,p) < meanfit: - # #print('classified ', fit(template,p) , ' meanfit: ' , meanfit) - p.currentclass = 1 - -def classifyhiker(template, peaks): - meanfit = np.mean(np.vectorize(fitinterpol2, otypes=[object])(template,peaks)) - #toclassify = peaks.tolist() - firstnot = 0 - for c in range(1,5): - first = True - template = peaks[firstnot] - for i, p in enumerate(peaks[firstnot:]): - if p.currentclass == 0: - if fitinterpol2(template,p) < meanfit: - # #print('peak number ' , i, 'classified as ', c, fit(template,p) , ' meanfit: ' , meanfit) - p.currentclass = c - template = p - elif first == True: - # #print('peak number ' , i, 'classified as First! ', c, fit(template,p) , ' meanfit: ' , meanfit) - firstnot = i - first = False - else: - None - ##print('peak number ' , i, 'classified as not classified!', fit(template,p) , ' meanfit: ' , meanfit) - return peaks - - - # def Templatefitnext( , number, templnum): - # for p in peaks: - # if fit(peaks[templnum], p) < fitparameter: - -def cut_snippets(data, peaklist, rnge): - snippets = [] - positions = xarray(peaklist) - heights = heightarray(peaklist) - for pos in positions: - snippets.append(data[(pos+rnge[0]):(pos+rnge[1])]) - scaledsnips = np.empty_like(snippets) - for i, snip in enumerate(snippets): - top = -rnge[0] - # plt.plot(snip) - scaledsnips[i] = snip * 1/heights[i] - #plt.plot(scaledsnips[i]) - # print('plted') -# plt.show() - #print('1') - alignedsnips = np.empty((len(snippets), (rnge[1]-rnge[0])*10-30-10)) - standardized = np.empty((len(snippets), (rnge[1]-rnge[0])*10-10)) - intfact = 10 - for i, snip in enumerate(scaledsnips): - if len(snip) < ((rnge[1]-rnge[0])): - if i == 0: - snip =np.concatenate([np.zeros([((rnge[1]-rnge[0]) - len(snip))]),np.array(snip)]) - if i == len(scaledsnips): - snip = np.concatenate([snip, np.zeros([((rnge[1]-rnge[0])-len(snip))])]) - else: - # print('this') - snip = np.zeros([(rnge[1]-rnge[0])]) - interpoled_snip = dta.interpol(snip, 'cubic')(np.arange(0, len(snip)-1, 1/intfact)) if len(snip) > 0 else np.zeros([(rnge[1]-rnge[0]-1)*intfact ]) #interpolfactor 10 - - intsnipheight = np.max(interpoled_snip) - np.min(interpoled_snip) - if intsnipheight == 0: - intsnipheight = 1 - interpoled_snip = (interpoled_snip - max(interpoled_snip))* 1/intsnipheight - standardized[i] = interpoled_snip - #print('2') - mean = np.mean(standardized, axis = 0) - #plt.plot(mean) -# plt.show() - #plt.plot(mean[10*-rnge[0]-10*5:-10*rnge[1]+21]) -# plt.show() - meantop = np.argmax(mean) - for i, snip in enumerate(standardized): - #plt.show() - interpoled_snip = snip #standardized[i] - cc = dta.crosscorrelation(interpoled_snip[15:-15], mean) - #cc = crosscorrelation(interpoled_snip[15 + 10*-rnge[0]-10*7:-15+ -10*rnge[1]+ 31], mean[10*-rnge[0]-10*7:-10*rnge[1]+31]) - #plt.plot(interpoled_snip[15 + 10*-rnge[0]-10*7:-15+ -10*rnge[1]+ 31]) - #top = np.argmax(interpoled_snip) - #offset = meantop - top - #if not(-15 <= offset <= 15): offset = 0 - offset = -15 + np.argmax(cc) - interpoled_snip = interpoled_snip[15-offset:-15-offset] if offset != -15 else interpoled_snip[30:] - #print(offset) - #plt.plot(interpoled_snip) - if len(interpoled_snip[~np.isnan(interpoled_snip)])>0: - alignedsnips[i] = interpoled_snip - #plt.show() - # print('3') - return snippets, alignedsnips - - -#def alignclusterlabels(labels, peaklist, peaks, olddatalen): -# overlapamount = len(peaks[:,peaks[0]<30000]) -# if overlapamount == 0: -# return None -# overlappeaks = copy.deepcopy(peaks[:overlapamount]) -# if len(peaks) > 3: -# print('wieso hat peaks eine pcclklasse?') -# overlappeaks = np.append(overlappeaks,[labels], axis = 0) -# overlap_peaklist = connect_blocks(old_peaklist) -# overlap_peaklist.classesnearbypccl = [-1]*len(overlap_peaklist.classesnearbypccl) -# classified_overlap = dta.ampwalkclassify3_refactor(overlappeaks,overlap_peaklist) -# -# labeltranslator = {} -# for cl in np.unique(classified_overlap[3]): -# if len(labeltranslator) <= len(np.unique(labels)): -# labelindex = np.where(classified_overlap[3] == cl)[0] -# label = labels[labelindex] -# labelindex = labelindex[np.where(label == stats.mode(label)[0])[0][0]] -# newlabel = labels[labelindex] -# try: -# oldlabel = old_peaklist.classesnearbypccl[::-1][old_peaklist.classesnearby[::-1].index(cl)] -# except: -# oldlabel = -2 -# try: -# labeltranslator[oldlabel] -# except KeyError: -# labeltranslator[oldlabel] = newlabel -# for lbl in peaks.classesnearbypccl: -# try: labeltranslator[lbl] -# except KeyError: labeltranslator[lbl] = lbl -# old_peaklist.classesnearbypccl = [labeltranslator[lbl] for lbl in old_peaklist.classesnearbypccl] -## print(labeltranslator) - -def fit(templ, peak): - fit = np.sum(np.square(templ.cut - peak.cut)) - return fit - -def fitinterpol2(templ,peak): - t = templ - p = peak - if p.real and t.real: - fit = np.sum(np.square(t.cutaligned-p.cutaligned)) - else: - fit = 0 - return fit - - - -def fitinterpol( templ, peak): - t = templ - p = peak - if p.real: - centerp = cutcenter(p) - centert = cutcenter(t) - shiftp = centerp-p.cutsize/2 - shiftt = centert-t.cutsize/2 - - if shiftp > -5: - shiftp = min(5, 5+centerp-p.cutsize/2) - else: shiftp = 0 - - if shiftt > -5: - shiftt = min(5, 5+centert-t.cutsize/2) - else: shiftt = 0 - - xnew = np.linspace(0,p.cutsize-11, (p.cutsize-1) * 4,endpoint = True) - #peak_interpoled = interpol(p.cut)(xnew) - #plt.plot(xnew, interpol(p.cut)(xnew+shift)) - # #print(interpol(templ.cut)(xnew+shiftt)-interpol(p.cut)(xnew+shiftp)) - fit = np.sum(np.square(interpol(templ.cut)(xnew+shiftt)-interpol(p.cut)(xnew+shiftp))) - else: - fit = 0 - return fit - - -def plotdata(peaks, data): - x = xarray(peaks) - y = yarray(peaks) - plt.plot(range(len(data)),data) - plt.plot(x, y, '.r', ms=20) - #for p in peaks: - # #print(p.height, p.x, p.y, p.distancetoltr, p.distancetortr, p.nexttrdistance) - # plt.plot(tr, data[tr], '.g', ms=20) - plt.show() - - -def plotdatabyx(peaksx, data): - x = peaksx - y = data[peaksx] - plt.plot(range(len(data)),data) - plt.plot(x, y, '.r', ms=20) - plt.show() - #for p in peaks: - # #print(p.height, p.x, p.y, p.distancetoltr, p.distancetortr, p.nexttrdistance) - # plt.plot(tr, data[tr], '.g', ms=20) - -def plotpeak(peaks): - #plt.plot(peaks), cutpeaks) #bei betrachtung aller blocks zu groß! - for p in peaks: - plt.plot(range(p.cutsize),p.cut) - #plt.plot(pk, x[pk] , '.r', ms=20) - plt.show() - - -def periodicinclass(peaks, cl): - noiselist = [] - classlist = np.vectorize(lambda peak: peak.cl, otypes=[object])(peaks) - peaks = xarray(peaks) - peaks = peaks[:][classlist == cl] - periodic = [] - periodiccollector = [] - error2 = [] - isperiodic = True - b=1 - c=2 - ctofar = False - compdif = 0 - dif = 0 - count = 1 - foundtriple = False - next = 0 - for i in range(len(peaks)-1): - if i != next: continue - # #print(i, 'foundtriple', foundtriple) - error2 = [] - b=1 - c=0 - A = peaks[i] - B = peaks[i+b] - compdif = dif - while foundtriple == True and count <= 3 and i+1 < len(peaks)-1: - while B-A < compdif*1.5 and i+b+1 < len(peaks)-1: - # #print('newdif: ', B-A, 'olddif:' , dif) - if abs((B-A) - compdif) < compdif*0.4: - error2.append(abs((B-A) - dif)) - b+=1 - B = peaks[i+b] - if len(error2) > 0: - bestB = error2.index(min(error2)) - B = peaks[i+1 + bestB] - periodic.append(B) - dif = 0.5*(dif + (B-A)) - # #print('match found') - b = 1+bestB - break - else: - count+=1 - compdif = dif*count - else: - if foundtriple == True: - # #print('no further match found, ') - isperiodic = False - - - - - while foundtriple == False and i+c< len(peaks)-1: - while i+c < len(peaks)-1: - A = peaks[i] - B = peaks[i+b] - C = peaks[i+c] - dif1 = B - A - dif2 = C - B - if (C-B > (B-A)*1.5): - break - if abs(dif1 - dif2) < dif1*0.4: - error2.append(abs(dif1-dif2)) - c +=1 - #C = peaks[i+c] # C weiterlaufenlassen, bis zu weit - else: - if len(error2) == 0: - # #print('no triple found') - isperiodic = False - if len(error2) > 0: - bestC = error2.index(min(error2)) - C = peaks[i+2 + bestC] - c = 2+ bestC - periodic.extend((A,B,C)) - dif1 = B - A - dif2 = C - B - # #print('dif1: ', dif1, 'dif2: ', dif2) - dif = 0.5*(dif2+dif1) - foundtriple = True - # #print('triple found', i+c, 'dif : ', dif) - else: - error2 = [] # B weiterlaufen lassen, C reset auf B+1 - b +=1 - c = b+1 - - if isperiodic == False: - if len(periodic) > 3: - periodiccollector.append(periodic) - isperiodic = True - periodic = [] - if c!=0: - next = i+c - else: - next = i+b - if len(periodiccollector) > 0: - # for i in range(len(periodiccollector)): - # #print('collector ', i, periodiccollector[i]) - return periodiccollector - else: - #print('no periodicity found') - return [] - - - -def noisediscard(peaklist, tsh_n, ultimate_threshold): - detected_noise = False - ##print('noisetsh: ', tsh_n) - for p in peaklist.list: - - if p.height < tsh_n or p.height < ultimate_threshold: - p.noise = True - detected_noise = True - peaklist.list = peaklist.list[:][np.vectorize(lambda peak: peak.noise, otypes=[object])(peaklist.list) == False] - # #print(peaks) - # for cl in classlist: - # diff = np.vectorize(lambda peak: peak.x, otypes=[object])(peaks[:][classlist == cl]) - # meandiff = np.mean(diff) - # msecompare = np.mean(np.square(diff-(diff*0.8))) - # mse = np.mean(np.square(diff-meandiff)) - # if mse > msecompare: - # noiselist.append(cl) - # for p in peaks: - #if p.cl in noiselist: - # if p.height < 0.1: - # p.noise = True - # peaks = peaks[:][np.vectorize(lambda peak: peak.noise, otypes=[object])(peaks) == False] - # return peaks - return detected_noise - - -def plotPCclasses_ref(peaks, data): - plt.plot(range(len(data)),data, color = 'black') - print(peaks) - classlist = np.array(peaks[3],dtype = 'int') - cmap = plt.get_cmap('jet') - colors =cmap(np.linspace(0, 1.0, 3000)) #len(np.unique(classlist)))) - np.random.seed(22) - np.random.shuffle(colors) - colors = [colors[cl] for cl in np.unique(classlist)] - print('classlist', np.unique(classlist)) - # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.c', ms=20) - # x=0 -# if len(classlist)>0: - # #print(classlist) - # #print('classes: ' , np.unique(classlist)) - #from collections import Counter - #count = Counter(classlist) - # #print('longest class: ', count.most_common()[0]) - for num, color in zip(np.unique(classlist), colors): - if num == -1 : - color = 'black' - peaksofclass = peaks[:,classlist == num] - print(num) - plt.plot(peaksofclass[0], peaksofclass[1], '.', color = color, ms =20) - #plt.scatter(peaks[0], peaks[2]) - # for p in peaks: - # plt.text(p.x, p.y, p.num) - #plt.show() - - print('show pcclasses') - plt.show() - plt.close() - -def plotampwalkclasses_refactored(peaks, data): - plt.plot(range(len(data)),data, color = 'black') - classlist = np.array(peaks[3],dtype=np.int) - cmap = plt.get_cmap('jet') - colors =cmap(np.linspace(0, 1.0, 3000)) #len(np.unique(classlist)))) - np.random.seed(22) - np.random.shuffle(colors) - colors = [colors[cl] for cl in np.unique(classlist)] - # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.c', ms=20) - # x=0 -# if len(classlist)>0: - # #print(classlist) - # #print('classes: ' , np.unique(classlist)) - #from collections import Counter - #count = Counter(classlist) - # #print('longest class: ', count.most_common()[0]) - for cl, color in zip(np.unique(classlist), colors): - peaksofclass = peaks[:,classlist == cl] - #xpred = linreg_pattern(peaksofclass[0:3]) - #for p in peaksofclass[0:3]: - # #print(p.x) - ##print(xpred, peaksofclass[3].x) - - #if len(peaksofclass) > 1000: - # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.', color = 'red', ms =20) - #else: - - plt.plot(peaksofclass[0],peaksofclass[1], '.', color = color, ms =20) - plt.scatter(peaksofclass[0], peaksofclass[2]) - # for p in peaks: - # plt.text(p.x, p.y, p.num) - plt.show() - - # plt.show() - plt.close() - - -def crosscorrelation(sig, data): - autocorr = signal.fftconvolve(data, sig[::-1], mode='valid') - return autocorr - -def plottemplatefits(data, peaks, tr, templnum): - # - plotdata(peaks, data, tr) - plt.plot(range(len(data)),data) - classes = np.vectorize(lambda peak: peak.currentclass, otypes=[object])(peaks) - class1 = peaks[:][classes == 1 ] - if len(class1) > 0: - plt.plot(xarray(class1), yarray(class1), '.r', ms=20) - class2 = peaks[:][classes == 2 ] - if len(class2) > 0: - plt.plot(xarray(class2), yarray(class2), '.g', ms=20) - class3 = peaks[:][classes == 3 ] - if len(class3) > 0: - plt.plot(xarray(class3), yarray(class3), '.c', ms=20) - class4 = peaks[:][classes == 4 ] - if len(class4) > 0: - plt.plot(xarray(class4), yarray(class4), '.y', ms=20) - - # for p in peaks: # <-- - # plt.text(p.x , p.y, p.num) - - # plt.plot(tr, data[tr], '.g', ms=20) - plt.show() - -def linreg_pattern(peaks): - from sklearn import datasets, linear_model - from sklearn.metrics import mean_squared_error, r2_score - - peaksx = xarray(peaks) - peaksx = peaksx.reshape(-1,1) - #peaksh = heightarray(peaks) - #peakx = peak.x - # Create linear regression object - regr = linear_model.LinearRegression() - numbers = np.arange(len(peaks)).reshape(-1,1) - # Train the model using the training sets - regr.fit(numbers, peaksx) - - # Make predictions using the testing set - peakx_pred = regr.predict(len(peaks)) - # # The coefficients - # #print('Coefficients: \n', regr.coef_) - # # The mean squared error - # #print("Mean squared error: %.2f" - # % mean_squared_error(diabetes_y_test, diabetes_y_pred)) - # # Explained variance score: 1 is perfect prediction - # #print('Variance score: %.2f' % r2_score(diabetes_y_test, diabetes_y_pred) - - - # Plot outputs - #plt.scatter(peaksx, peaksh, color='black') - #plt.scatter(peakx, peakh_pred, color='blue') - - #plt.xticks(()) - #plt.yticks(()) - - # plt.show() - - return peakx_pred - -def linreg(peaks, peak): - from sklearn import datasets, linear_model - from sklearn.metrics import mean_squared_error, r2_score - - peaksx = xarray(peaks) - peaksx = peaksx.reshape(-1,1) - peaksh = heightarray(peaks) - peakx = peak.x - # Create linear regression object - regr = linear_model.LinearRegression() - - # Train the model using the training sets - regr.fit(peaksx, peaksh) - - # Make predictions using the testing set - peakh_pred = regr.predict(peakx) - - # # The coefficients - # #print('Coefficients: \n', regr.coef_) - # # The mean squared error - # #print("Mean squared error: %.2f" - # % mean_squared_error(diabetes_y_test, diabetes_y_pred)) - # # Explained variance score: 1 is perfect prediction - # #print('Variance score: %.2f' % r2_score(diabetes_y_test, diabetes_y_pred) - - - # Plot outputs - #plt.scatter(peaksx, peaksh, color='black') - #plt.scatter(peakx, peakh_pred, color='blue') - - #plt.xticks(()) - #plt.yticks(()) - - # plt.show() - - - - return peakh_pred - -def wp_transform(x): - import pywt - wp = pywt.WaveletPacket(data=x, wavelet='haar', mode='symmetric') - print('maxlevel: ', wp[''].maxlevel) - return (np.array([node.data for node in wp.get_level(wp[''].maxlevel, 'freq')])).flatten() - -def wpfeats(snips): - size = len(wp_transform(snips[0])) - wp = np.empty([len(snips), size]) - for i, snip in enumerate(snips): - print(wp_transform(snip)) - wp[i] = (wp_transform(snip)) - #wp = wp.T - print(wp[0]) - wpcoef = wp.T - print(wp[0]) - from sklearn.preprocessing import StandardScaler - wpcoef = StandardScaler().fit_transform(wpcoef) - coeffvalues = [] - for coeff in wpcoef: - stat, crit, sig = stats.anderson(coeff, dist = 'norm') - # coeffvalues.append(stat) - coeffvalues.append(np.sum(np.abs(coeff))) - coeffvalues = np.array(coeffvalues) - coeffs = np.argsort(coeffvalues)[::-1][:10] - print(coeffvalues[coeffs]) - return wp.T[coeffs] - - - - -def pc(cutsnippets, peaklist): - # (observations, features) matrix - M = np.empty([len(cutsnippets), len(cutsnippets[0])]) - for i, snip in enumerate(cutsnippets): - M[i] = snip[:] - from sklearn.preprocessing import StandardScaler - StandardScaler().fit_transform(M) - # #print(M.shape, ' Mshape') - # singular value decomposition factorises your data matrix such that: - # - # M = U*S*V.T (where '*' is matrix multiplication) - # - # * U and V are the singular matrices, containing orthogonal vectors of - # unit length in their rows and columns respectively. - # - # * S is a diagonal matrix containing the singular values of M - these - # values squared divided by the number of observations will give the - # variance explained by each PC. - # - # * if M is considered to be an (observations, features) matrix, the PCs - # themselves would correspond to the rows of S^(1/2)*V.T. if M is - # (features, observations) then the PCs would be the columns of - # U*S^(1/2). - # - # * since U and V both contain orthonormal vectors, U*V.T is equivalent - # to a whitened version of M. - - U, s, Vt = np.linalg.svd(M, full_matrices=False) - V = Vt.T - - # PCs are already sorted by descending order - # of the singular values (i.e. by the - # proportion of total variance they explain) - S = np.diag(s) - # PC = (s*V) - # PCs: - #print(U.shape) - #print(S.shape) - #print(V.shape) - #print(s[0], U[0,:]) - - #PC1 = (s[0] * U[:,0]) - #PC2 = (s[1] * U[:,1]) - #for i, p in enumerate(peaklist): - # p.pc1 = PC1[i] - # p.pc2 = PC2[i] - - #mu = peaks.mean(axis=0) - #fig, ax = plt.subplots() - #ax.scatter(xData, yData) - #for axis in U: - # start, end = mu, mu + sigma * axis - # ax.annotate( - # '', xy=end, xycoords='data', - # xytext=start, textcoords='data', - # arrowprops=dict(facecolor='red', width=2.0)) - #ax.set_aspect('equal') - #plt.show() - - - # if plot_steps: - # plt.scatter(PC1, PC2) - # plt.show() - - # PCData1 = (U[:,0]*M) - # PCData2 = (U[:,1]*M) - # plt.scatter(PCData1, PCData2) - # plt.show() - - #plt.scatter(U[:,0],U[:,1]) - #plt.show() - #print('done') - #return PC - - # if we use all of the PCs we can reconstruct the noisy signal perfectly - #Mhat = np.dot(U, np.dot(S, V.T)) - #print('Using all PCs, MSE = %.6G' %(np.mean((M - Mhat)**2))) - - #plt.show() - return S@U.T - -def gettime(x, samplerate, starttime): - startm = int(starttime[-2:]) - starth = int(starttime[:-2]) - seconds = x/samplerate - m, s = divmod(seconds, 60) - m = m + startm - h, m = divmod(m, 60) - h = h+starth - return "%d:%02d:%02d" % (h, m, s) - -#def connect_blocks(oldblock): -# newblock = Peaklist([]) -# newblock.lastofclass = oldblock.lastofclass -# newblock.lastofclassx = oldblock.lastofclassx -# newblock.classesnearby = oldblock.classesnearby -# newblock.classesnearbypccl = oldblock.classesnearbypccl -# newblock.classesnearbyx = [clnearbyx - oldblock.len for clnearbyx in oldblock.classesnearbyx] -# return newblock -# ##print('classesnearbyx! old, new ' , oldblock_len,oldblock.classesnearbyx , newblock.classesnearbyx) - -if __name__ == '__main__': - main() - - - -# deleted Code, but unsure if really want to delete: - - #nix #print( b.data_arrays) - - # for cl in np.unique(cllist): - - # currentfish_x = x[:][cllist == cl] - # currentfish_y = y[:][cllist == cl] - # currentfish_h = x[:][cllist == cl] - - - #nix try: - #nix xpositions[cl] = b.create_data_array("f%d_eods" %cl, "spiketimes", data = currentfish_x) - #nix xpositions[cl].append_set_dimension() - #nix # thisfish_eods = b.create_multi_tag("f%d_eods_x"%cl, "eods.position", xpositions[cl]) - #nix # thisfish_eods.references.append(nixdata) - #nix except nix.pycore.exceptions.exceptions.DuplicateName: - #nix - #nix xpositions[cl].append(currentfish_x) - - - #thisfish_eods.create_feature(y, nix.LinkType.Indexed) - #b.create_multi_tag("f%d_eods_y"%cl, "eods.y", positions = y) - #b.create_multi_tag("f%d_eods_h"%cl, "eods.amplitude", positions = h) - #thisfish_eods.create_feature - - - - -# in analyseEods -# in analyseEods classlist = eods[3] #np.vectorize(lambda peak: peak.cl, otypes=[object])(worldpeaks.list) -# in analyseEods fishclass = {} -# in analyseEods #print('classlist: ', classlist) -# in analyseEods # #print('Classes at end: ', np.unique(classlist)) -# in analyseEods -# in analyseEods -# in analyseEods fishes = {} -# in analyseEods for num in np.unique(classlist): -# in analyseEods fishes[num] = eods[:,:][: , classlist == num] -# in analyseEods -# in analyseEods -# in analyseEods -# in analyseEods -# in analyseEods fishes = fill_hidden_3(fishes) # cl-dict : x y z -dict -# in analyseEods #maxlencl = max(fishes, key=lambda k: fishes[k]['x'][-1]-fishes[k]['x'][0]) -# in analyseEods -# in analyseEods fishes, weirdparts = fill_holes(fishes) -# in analyseEods fishes, weirdparts = fill_holes(fishes) -# in analyseEods -# in analyseEods for cl in np.unique(classlist): -# in analyseEods isi = [isi for isi in np.diff(fishes[cl]['x'])] -# in analyseEods fishes[cl][3]= isi -# in analyseEods - - -#npFish -#npFish npFishes = {} -#npFish fishfeaturecount = len(fishes[cl]) -#npFish for cl in np.unique(classlist): -#npFish npFishes[cl]= np.zeros([fishfeaturecount, len(fishes[cl]['x'])]) -#npFish for i, feature in enumerate(['x', 'y', 'h', 'isi']): #enumerate(fishes[cl]): -#npFish if feature == 'isi': -#npFish fishes[cl][feature].append(fishes[cl][feature][-1]) -#npFish # #print(feature, cl) -#npFish npFishes[cl][i] = np.array(fishes[cl][feature]) -#npFish # #print(npFishes[classlist[0]][0]) -#npFish # #print(npFishes[classlist[0]][2]) -#npFish # #print(npFishes[classlist[0]][3]) -#npFish #np.savetxt('worldpeaks_x_y_cl_2', (x,y,cl, isi), fmt="%s") -#npFish -#npFish np.set_printoptions(threshold=np.nan) -#npFish -#npFish for i, cl in enumerate(np.unique(classlist)): #Neue Klassennamen! -#npFish x = npFishes[cl][0] -#npFish y = npFishes[cl][1] -#npFish h = npFishes[cl][2] -#npFish isi = npFishes[cl][3] -#npFish -#npFish np.savetxt(filename[:-4]+'Fish_xyhisi_cl%d' % i, npFishes[cl], fmt="%s") -#npFish -#npFish -#npFish - - - - - - # / TODO: Peakclassifikator bei weit wegliegenden klassen? Done - # / TODO: Class2 implementation auf class linreg übertragen Done - Doof - # TODO: Klassen zusammenfuegen/ Noise zusammenfuegen - # - Wenn last 3 und first 3 zueinander passen in 1. Amplitude und 2. Periode (falls peaks) oder 2. randomzeugs? - Noiseerkennung und 2. Amplitude - # TODO: Klassen filtern auf Patternausreißer - # diff --git a/thunderfish/DextersThunderfishAddition/analyseDexThinned.py b/thunderfish/DextersThunderfishAddition/analyseDexThinned.py deleted file mode 100644 index aa131ad7..00000000 --- a/thunderfish/DextersThunderfishAddition/analyseDexThinned.py +++ /dev/null @@ -1,2262 +0,0 @@ -# Script to detect and classify EODs in recordings of weakly electric pulse -# fish, Dexter Früh, 2018 -# -# it is suggested to save the recording in -# workingdirectory/recording/recording.WAV - -# results will be saved in workingdirectory/recording/ -# -# input: -# - [Recorded Timeseries] recording.WAV -# outputs(optional): -# - [Detected and Classified EODs] -# (Numpy Array with Shape (Number of EODs, 4 (Attributes of EODs)), -# with the EOD-Attributes -# - x-location of the EOD -# (time/x-coordinate/datapoint in recording) -# - y-location of the EOD -# (Amplitude of the positive peak of the pulse-EOD) -# - height of the EOD(largest distance between peak and through in the EOD) -# - class of the EOD -# eods_recording.npy -# - [plots of the results of each analyse step for each -# analysepart (timeinterval of length = deltat) of the recording] -# -# required command line arguments at function call -# - save : if True, save the results to a numpy file (possibly -# overwrite existing) -# - plot : if True, plot results in each analysestep -# - new : if True, do a new analysis of the recording, even if there -# is an existing analyzed .npy file with the right name. -# -# call with: -# python3 scriptname.py save plot new (starttime endtime[sec] for only -# partial analysis) -# -# other parameters are behind imports and some hardcoded at the relevant -# codestep -import sys -import numpy as np -import copy -from scipy.stats import gmean -from scipy import stats -from scipy import signal -from scipy import optimize -import matplotlib -from fish import ProgressFish -import matplotlib.pyplot as plt -from thunderfish.dataloader import open_data -from thunderfish.peakdetection import detect_peaks -from scipy.interpolate import interp1d -from scipy.signal import savgol_filter -from collections import deque -import ntpath -import nixio as nix -import time -import os -from shutil import copy2 -from ownDataStructures import Peak, Tr, Peaklist - -from IPython import embed -# parameters for the analysis - -deltat = 30.0 # seconds of buffer size -thresh = 0.04 # minimal threshold for peakdetection -peakwidth = 20 # width of a peak and minimal distance between two EODs - -# basic parameters for thunderfish.dataloader.open_data -verbose = 0 -channel = 0 - -# timeinterval to analyze other than the whole recording -#starttime = 0 -#endtime = 0 -#timegiven = False - -def main(): # analyse_dex.py filename save plot new (optional starttime endtime [sec]) - # home = os.path.expanduser('~') - # os.chdir(home) - # defaults for optional arguments - timegiven = False - plot_steps = False - - # parse command line arguments - filepath, save, plot, new (, starttime, - # endtime) - filepath = sys.argv[1] - #thresh = 0.05 - save = int(sys.argv[2]) - plot_steps = int(sys.argv[3]) - new = int(sys.argv[4]) - if len(sys.argv[:])>5: - timegiven = True - starttime = int(sys.argv[5]) - endtime = int(sys.argv[6]) - #print(starttime, endtime) - # plot_steps = 1 - peaks = np.array([]) - troughs = np.array([]) - cutsize = 20 - maxwidth = 50 #10 - ultimate_threshold = thresh+0.01 - filename = path_leaf(filepath) - - ### ## ask user before overwriting - # if save == 1: - # proceed = input('Really want to save data and possibly overwrite existing? [y/n]').lower() - # if proceed == 'n': - # quit() - # elif proceed == 'y': - # printcat file | while read line - # do - #do something - # done('continuing') - # elif proceed != 'y': - # quit() - datasavepath = filename[:-4] - print(datasavepath) - eods_len = 0 - - ### ## starting analysis if it is wished or the analyzed EODs-file is not available in the working directory - if new == 1 or not os.path.exists(filename[:-4]+"/eods5_"+filename[:-3]+"npy"): - - ### ## import data - with open_data(filepath, channel, deltat, 0.0, verbose) as data: - - if save == 1 or save == 0: - # datasavepath = filename[:-4]+"/"+filename - if not os.path.exists(datasavepath): - os.makedirs(datasavepath) - copy2(filepath, datasavepath) - samplerate = data.samplerate - - ### ## split datalength into smaller blocks - nblock = int(deltat*data.samplerate) - if timegiven == True: - #print(starttime, samplerate) - parttime1 = starttime*samplerate - # parttime1 = samplerate * 10270 - parttime2 = endtime*samplerate - data = data[parttime1:parttime2] - if len(data)%nblock != 0: - blockamount = len(data)//nblock + 1 - else: - blockamount = len(data)//nblock - bigblock = [] - - ### ## output first (0%) progress bar - print('blockamount: ' , blockamount) - progress = 0 - print(progress, '%' , end = " ", flush = True) - fish = ProgressFish(total = blockamount) - olddatalen = 0 - startblock = 0 - ## iterating through the blocks, detecting peaks in each block - for idx in range(startblock, blockamount): - - ### ## print progress - if progress < (idx*100 //blockamount): - #print(progress, '%' , end = " ", flush = True) - progress = (idx*100)//blockamount - # print('.' , end = '') - progressstr = 'Partstatus: '+ str(0) + ' '*2 + ' % (' + '0' + ' '*4+ '/' + '?'+' '*4+ '), Filestatus:' - fish.animate(amount = idx, dexextra = progressstr) - progressstr = 'Partstatus: '+ 'Part ' + '0'+ '/''5'+' Filestatus:' - fish.animate(amount = idx, dexextra = progressstr) - - ### ## take debugging times, not used right now - time1 = time.time() - #print('took ', time1-time0, 's') - time0 = time1 - - # time measurement of parts of the algorithm to find time - # efficiency bottlenecks - bottletime = [] - bottletime.append(time.time()) #0 - - datx = data[idx*nblock:(idx+1)*nblock] - - ### ## smoothing of the timeseries and calculating autocorrelation - not used - #from scipy.signal import butter, lfilter - #datx = savgol_filter(datx, 11, 7) - #fs = samplerate # 1 ns -> 1 GHz - #cutoff = samplerate/10 # 10 MHz - #B, A = butter(5, cutoff / (fs / 3), btype='low') # 1st order Butterworth low-pass - #datx = lfilter(B, A, datx, axis=0) - #plt.plot(datx) - #plt.show() - #sig = data[-320000:-1] - #autocorr = signal.fftconvolve(sig, sig, mode='full') - #plt.plot(autocorr) - #plt.show() - #f, Pxx_den = signal.periodogram(sig, samplerate) - #plt.plot(Pxx_den) - #plt.show() - #x = savgol_filter(x, 11, 7) - - # ---------- analysis ----------- - # step1: detect peaks in timeseries - pk, tr = detect_peaks(datx, thresh) - troughs = tr - bottletime.append(time.time()) #1 - # continue with analysis only if multiple peaks are detected - if len(pk) > 2: - def makepeaklist_refactor(pk,tr,data): - ### ## create 'peaks' with x,y and height and discard peaks that seem to be no EODs based on their width and simple features like - no minimum close to the maximum. - # decide whether a peak or a through is detected first - pkfirst = int((min(pk[0],tr[0])= 0 and right_tr_ind < len(tr): - # ltr_x = tr[left_tr_ind] - # ltr_y = datx[ltr_x] - # rtr_x = tr[right_tr_ind] - # rtr_y = datx[rtr_x] - if min((pk_x - ltr_x),(rtr_x -pk_x)) > peakwidth: - pk_r[...] = False - elif max((pk_x - ltr_x),(rtr_x -pk_x)) <= peakwidth: - pk_h[...] = pk_y - min(ltr_y, rtr_y) - else: - if (pk_x-ltr_x)<(rtr_x-pk_x): - pk_h[...] = pk_y-ltr_y - else: - pk_h[...] = pk_y -rtr_y - elif left_tr_ind == -1: - if rtr_x-pk_x > peakwidth: - pk_r[...] = False - else: - pk_h[...] = pk_y- rtr_y - elif right_tr_ind == len(tr): - if pk_x-ltr_x > peakwidth: - pk_r[...] = False - else: - pk_h[...] = pk_y-ltr_y - peaks = np.array([peaks_x, peaks_y, peaks_h], dtype = np.float)[:,peaks_real!=0] - return peaks - peaks = makepeaklist_refactor(pk,tr,datx) - #plt.plot(data[0:32000]) - #for ik in peaks.list[0:400]: - # plt.scatter(i.x, i.height) - #plt.show() - bottletime.append(time.time()) #2 - def discardnearbypeaks_refactor(peaks, peakwidth): - ### ## discard peaks that are close to each other, as a EOD mostly has more than one maximum and only one of the maxima is considered to be the EOD/EODlocation - unchanged = False - while unchanged == False: - x_diffs = np.diff(peaks[0]) - peaks_heights = peaks[2] - peaks_delete = np.zeros(len(peaks[0])) - for i, diff in enumerate(x_diffs): - if diff < peakwidth: - if peaks_heights[i+1] > peaks_heights[i] : - peaks_delete[i] = 1 - else: - peaks_delete[i+1] = 1 - peaks = peaks[:,peaks_delete!=1] - if np.count_nonzero(peaks_delete)==0: - unchanged = True - return peaks - peaks = discardnearbypeaks_refactor(peaks,peakwidth) -# plt.plot(datx) -# plt.scatter(peaks[0],peaks[1]) -# plt.show() -# ### ## tries to calculate the noiselevel in the current recording part. Might actually not do anything at all, because the ultimate_threshold might be larger eitherway. some recordings have some exploitable data below this threshold, but most don't. And the rate of errors just gets too big for such small peaks. -# if len(peaks.list) > 2: -# tsh_n = calc_tsh_noise(peaks.list, datx) - bottletime.append(time.time()) #5 - # if len(peaks.list) > 2: - # noisediscard(peaks, ultimate_threshold, ultimate_threshold) - bottletime.append(time.time()) #6 - progressstr = 'Partstatus: '+ 'Part ' + '1'+ '/''5'+' Filestatus:' - fish.animate(amount = idx, dexextra = progressstr) - if len(peaks) > 0: - bottletime.append(time.time()) #7 - ### ## connects the current part with the one that came before, to allow for a continuous analysis - if idx >= startblock+1: - peaklist = connect_blocks(peaklist) - else: - peaklist = Peaklist([]) - bottletime.append(time.time()) #8 - #print('\n ') - #print('cut_snips, with ' ,len(peaks.list), 'peaks') - # cuts snippets from the data time series around the peaks, interpolates them and aligns them - def cut_snippets_refactor(data, peaks, rnge): - snippets = [] - positions = np.array(peaks[0],dtype=np.int) - heights = peaks[2] - intfact = 10 - alignrange = 1.5 - alignwidth = int(np.ceil(alignrange * intfact) ) - for pos in positions: - snippets.append(data[(pos+rnge[0]):(pos+rnge[1])]) - scaled_snips = np.empty_like(snippets) - for i, snip in enumerate(snippets): - top = -rnge[0] - #plt.plot(snip) - scaled_snips[i] = snip * 1/heights[i] - #plt.plot(scaledsnips[i]) - #plt.show() - aligned_snips = np.empty((len(snippets), (rnge[1]-rnge[0])* - intfact-(2*alignwidth)-intfact)) - ipoled_snips = np.empty((len(snippets), (rnge[1]-rnge[0])*intfact-intfact)) - - for i, snip in enumerate(scaled_snips): - if len(snip) < ((rnge[1]-rnge[0])): - if i == 0: - snip = np.concatenate([np.zeros([((rnge[1]-rnge[0]) - len(snip))]),np.array(snip)]) - if i == len(scaledsnips): - snip = np.concatenate([snip, np.zeros([((rnge[1]-rnge[0])-len(snip))])]) - else: - snip = np.zeros([(rnge[1]-rnge[0])]) - interpolation = interpol(snip, 'cubic') #if len(snip) > 0 else np.zeros([(rnge[1]-rnge[0]-1)*intfact ]) - interpoled_snip = interpolation(np.arange(0, len(snip)-1, 1/intfact)) - intsnipheight = np.max(interpoled_snip) - np.min(interpoled_snip) - if intsnipheight == 0: - intsnipheight = 1 - interpoled_snip = (interpoled_snip - max(interpoled_snip))* 1/intsnipheight - ipoled_snips[i] = interpoled_snip - - mean = np.mean(ipoled_snips, axis = 0) - meantop = np.argmax(mean) - #plt.plot(mean) - #plt.show() - #plt.plot(mean[10*-rnge[0]-10*5:-10*rnge[1]+21]) - #plt.show() - for i, interpoled_snip in enumerate(ipoled_snips): - cc = crosscorrelation(interpoled_snip[alignwidth:-alignwidth], mean) - #cc = crosscorrelation(interpoled_snip[15 + 10*-rnge[0]-10*7:-15+ -10*rnge[1]+ 31], mean[10*-rnge[0]-10*7:-10*rnge[1]+31]) - offset = -15 + np.argmax(cc) - interpoled_snip = interpoled_snip[15-offset:-15-offset] if offset != -15 else interpoled_snip[30:] - #plt.plot(interpoled_snip) - if len(interpoled_snip[~np.isnan(interpoled_snip)])>0: - aligned_snips[i] = interpoled_snip - #plt.show() - return snippets, aligned_snips - snips, aligned_snips = cut_snippets_refactor(datx,peaks, [-15,15]) - # snips, scaledsnips = cut_snippets(datx, peaks.list, [-15,15]) - #wpf = wpfeats(scaledsnips) - #print(wpf[0]) - #print('pc') - progressstr = 'Partstatus: '+ 'Part ' + '2'+ '/''5'+' Filestatus:' - fish.animate(amount = idx, dexextra = progressstr) - #print('len ', len(scaledsnips)) - #print(scaledsnips) - def pc_refactor(cutsnippets): - # (observations, features) matrix - M = np.empty([len(cutsnippets), len(cutsnippets[0])]) - for i, snip in enumerate(cutsnippets): - M[i] = snip[:] - from sklearn.preprocessing import StandardScaler - from sklearn.decomposition import PCA - #StandardScaler().fit_transform(M) - pca = PCA() - pc_comp= pca.fit_transform(M) - return pc_comp - # calculates principal components - pcs = pc_refactor(aligned_snips) - #print('dbscan') - - # clusters the features(principal components) using dbscan algorithm. clusterclasses are saved into the peak-object as Peak.pccl - order = 5 - minpeaks = 3 if deltat < 2 else 10 - def dbscan_refactor(pcs, peaks, order, eps, min_samples, takekm, olddatalen): - # pcs (samples, features) - # X (samples, features) - from sklearn.cluster import DBSCAN - from sklearn import metrics - from mpl_toolkits.mplot3d import Axes3D - from sklearn.cluster import AgglomerativeClustering - try: - X = pcs[:,:order] - except: - X = pcs[:,order] - # ############################################################################# - # Compute DBSCAN - db = DBSCAN(eps, min_samples).fit(X) - from sklearn.cluster import KMeans - core_samples_mask = np.zeros_like(db.labels_, dtype=bool) - core_samples_mask[db.core_sample_indices_] = True - labels = db.labels_ ##### TODO ###### --- irgendwo Indexfehler oder so, last change - pcs richtige DImension - #peaks = np.array([np.append(peaks[:,i],labels[i]) for i in range(len(peaks[0]))]) - peaks = np.append(peaks,[labels], axis = 0) - return peaks - - peaks = dbscan_refactor(pcs, peaks, order, 0.4, minpeaks, False, olddatalen) - - #plotPCclasses(peaks.list, datx) - olddatalen = len(datx) - num = 1 - #classlist = np.vectorize(lambda peak: peak.pccl, otypes=[object])(peaks.list) - #snips, scaledsnips = cut_snippets(datx, peaks.list[classlist == num], [-15,5]) - #pcs2 = pc(scaledsnips, peaks.list[classlist==num]) - #pcs2 = wpfeats(scaledsnips) - #dbscan(pcs2, peaks.list[classlist == num],4, 0.15, 15, False) - #print('Classify') - progressstr = 'Partstatus: '+ 'Part ' + '3'+ '/''5'+' Filestatus:' - fish.animate(amount = idx, dexextra = progressstr) - - # classifies the peaks using the data from the clustered classes and a simple amplitude-walk which classifies peaks as different classes if their amplitude is too far from any other classes' last three peaks - peaks, peaklist = ampwalkclassify3_refactor(peaks, peaklist) # classification by amplitude - # print(peaks.classlist) - print(peaks) - bottletime.append(time.time()) #9 - join_count=0 - while True and joincc(peaklist, peaks) == True and join_count < 200: - join_count += 1 - continue - # print(peaks.classlist) - bottletime.append(time.time()) #10 - - # discards all classes that contain less than mincl EODs - mincl = 6 # >=1 - peaks = smallclassdiscard(peaks, mincl) - bottletime.append(time.time()) #11 - - # discards peaks, that are too wide compared to their - # inter spike intervals and seem to be wavesfish signals - # actually... works in some cases - if len(peaks[0]) > 0: - peaks = discardwaves_refactor(peaks, datx) - - # plots the data part and its detected and classified peaks - if plot_steps == True: - plotampwalkclasses_refactored(peaks, datx) - bottletime.append(time.time()) #12 - - # map the analyzed EODs of the buffer part to the whole - # recording - worldpeaks = np.copy(peaks) - bottletime.append(time.time()) #13 - # change peaks location in the buffered part to the location relative to the - idx = 1 - # peaklocations relative to whole recording - worldpeaks[0] = worldpeaks[0] + (idx*nblock) - peaklist.len = idx*nblock -# for p in worldpeaks: -# = idx*nblock + p.x - bottletime.append(time.time()) #14 - bottletime.append(time.time()) #15 - # extract the relevant information from each peakobject of - # the buffered part and rearrange it as numpy array for - # computational efficienty - #x = xarray(thisblock) - #y = yarray(thisblock) - #h = heightarray(thisblock) - #cllist = clarray(thisblock) - #bottletime.append(time.time()) #16 - #thisblock_eods = np.array([x,y,h, cllist]) - #bottletime.append(time.time()) #17 - #bottletime.append(time.time()) #18 - #thisblockeods_len = len(thisblock_eods[0,:]) - thisblock_eods = np.delete(peaks,3,0) - thisblockeods_len = len(thisblock_eods[0]) - progressstr = 'Partstatus: '+ 'Part ' + '4'+ '/''5'+' Filestatus:' - fish.animate(amount = idx, dexextra = progressstr) - - # save the peaks of the current buffered part to a numpy-memmap on the disk - if thisblockeods_len> 0 and save == 1 or save == 0: - if idx == 0: - eods = np.memmap(datasavepath+"/eods_"+filename[:-3]+"npmmp", dtype='float64', mode='w+', shape=(4,thisblockeods_len), order = 'F') - # fp = np.memmap(filepath[:len(filename)]+"eods_"+filename[:-3]+"npy", dtype='float32', mode='w+', shape=(4,len(thisblock_eods[0,:]))) - dtypesize = 8#4 #float32 is 32bit = >4< bytes long ---changed to float64 -> 8bit - eods = np.memmap(datasavepath+"/eods_"+filename[:-3]+"npmmp", dtype='float64', mode='r+', offset = dtypesize*eods_len*4, shape=(4,thisblockeods_len), order = 'F') - eods[:] = thisblock_eods - eods_len += thisblockeods_len - bottletime.append(time.time()) #19 - #classes.extend(np.unique(cllist)) - - # to clean the plt buffer... - plt.close() - - # get and print the measured times of the algorithm parts for the - # current buffer - bottletime.append(time.time())#20 - time_a= bottletime[0] - for i, times in enumerate(bottletime): - #print('times: ' ,i, times-time_a) - time_a=times - - progressstr = 'Partstatus: '+ 'Part ' + '5'+ '/''5'+' Filestatus:' - fish.animate(amount = idx, dexextra = progressstr) - # plt.show() - - # after the last buffered part has finished, save the memory mapped - # numpy file of the detected and classified EODs to a .npy file to the - # disk - eods = np.memmap(datasavepath+"/eods_"+filename[:-3]+"npmmp", dtype='float64', mode='r+', shape=(4,eods_len), order = 'F') - print('before final saving: print unique eodcl: ' , np.unique(eods[3])) - if save == 1: - # #print('eods', eods[3]) - path = filename[:-4]+"/" - if not os.path.exists(path): - os.makedirs(path) - if eods_len > 0: - print('Saved!') - np.save(filename[:-4]+"/eods8_"+filename[:-3]+"npy", eods) - else: - #np.save(filename[:-4]+"/eods5_"+filename[:-3]+"npy", thisblock_eods) - print('not saved') - - else: # if there already has been a certain existing result file and 'new' was set to False - print('already analyzed') - - - # not used data implementation using NIX - # Save Data - - # Needed: - # Meta: Starttime, Startdate, Length - # x, y, h, cl, difftonextinclass -> freq ? , - - # Later: Find "Nofish" - # Find "Twofish" - # Find "BadData" - # Find "Freqpeak" - # ? Find "Amppeak" - # - - # bigblock = np.array(bigblock) - # x=xarray(bigblock) - # y=yarray(bigblock) - # cl=clarray(bigblock) - - - #nix file = nix.File.open(file_name, nix.FileMode.ReadWrite) - #nix b = file.blocks[0] - #nix nixdata = b.data_arrays[0] - #nix cldata = [] - #nix #print(classes) - #nix #print(b.data_arrays) - #nix for i in range(len(np.unique(classes))): - #nix cldata.append(b.data_arrays[i+1]) - - - # for cl in - - # for cl in - # x = thisfish_eods - - - #nix file.close() - -def path_leaf(path): - ntpath.basename("a/b/c") - head, tail = ntpath.split(path) - return tail or ntpath.basename(head) - -def fill_hidden(fishclasses): - - fishes = fishclasses - - nohidefishes = {} - for cl in fishes: - x =[] - y = [] - h = [] - fish = fishes[cl] - # #print('fish', fish) - fishisi = calcisi(fish) - isi = fishisi[0] - for i, newisi in enumerate(fishisi): - leftpeak = fish[i] - x.append(leftpeak.x) - y.append(leftpeak.y) - h.append(leftpeak.height) - if newisi > 2.8*isi: - guessx = leftpeak.x + isi - - while guessx < leftpeak.x + newisi-0.8*isi: - - peakx = peakaround(guessx, isi*0.1, fishes) - if peakx is not None: - x.append(peakx) - y.append(leftpeak.y) - h.append(leftpeak.height) - guessx = peakx+ isi + (peakx-guessx) - - continue - break - isi = newisi - nohidefishes[cl]= {'x':x,'y':y,'h':h} - return nohidefishes - -def plotheights(peaklist): - heights = heightarray(peaklist) - x_locations = xarray(peaklist) - plt.scatter(x_locations, heights) - plt.show() - -def ploteods(eods, data): - plt.plot(range(len(data)),data, color = 'black') - classlist = eods[3] - cmap = plt.get_cmap('jet') - colors =cmap(np.linspace(0, 1.0, 3000)) #len(np.unique(classlist)))) - np.random.seed(22) - np.random.shuffle(colors) - colors = [colors[cl] for cl in np.unique(classlist)] - # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.c', ms=20) - x=0 - if len(classlist)>0: - # #print(classlist) - # #print('classes: ' , np.unique(classlist)) - from collections import Counter - count = Counter(classlist) - # #print('longest class: ', count.most_common()[0]) - for num, color in zip(np.unique(classlist), colors): - peaksofclass = eods[:,:][:, classlist == num] - #xpred = linreg_pattern(peaksofclass[0:3]) - #for p in peaksofclass[0:3]: - # #print(p.x) - ##print(xpred, peaksofclass[3].x) - - #if len(peaksofclass) > 1000: - # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.', color = 'red', ms =20) - #else: - plt.plot(peaksofclass[0], peaksofclass[1], '.', color = color, ms =20) - plt.show() - -def fill_hidden_3(fishes): - - fishes = fishes - - nohidefishes = {} - for cl, fish in fishes.items(): - x =[] - y = [] - h = [] - # fish = fishes[cl] passt net, fishes is np.array mit (cl, (xyh)) - fishisi = np.diff(fish[0]) - isi = fishisi[0] - for i, newisi in enumerate(fishisi): - leftpeak = i - x.append(fish[0][i]) - y.append(fish[1][i]) - h.append(fish[2][i]) - # #print(cl, fish[0][i], isi, newisi) - if newisi > 2.8*isi: - guessx = fish[0][i] + isi - - while guessx < fish[0][i] + newisi-0.8*isi: - - peakx = peakaround3(guessx, isi*0.1, fishes) - if peakx is not None: - # #print(jup) - x.append(peakx) - y.append(fish[1][i]) - h.append(fish[2][i]) - guessx = peakx+ isi + (peakx-guessx) - - continue - break - isi = newisi - nohidefishes[cl]= {'x':x,'y':y,'h':h} - - return nohidefishes - -def peakaround2(guessx, interval, fishes): - found = False - for cl, fish in fishes.items(): - for px in fish['x']: - distold = interval - if px < guessx-interval: - continue - # #print('in area', guessx-interval) - if guessx-interval < px < guessx+interval: - found = True - dist = px-guessx - if abs(dist) < abs(distold): - distold = dist - if px > guessx+interval: - if found == True: - # #print(guessx, dist) - return guessx + dist - else: break - return None - -def peakaround3(guessx, interval, fishes): - found = False - for cl, fish in fishes.items(): - for px in fish[0]: - distold = interval - if px < guessx-interval: - continue - # #print('in area', guessx-interval) - if guessx-interval < px < guessx+interval: - found = True - dist = px-guessx - if abs(dist) < abs(distold): - distold = dist - if px > guessx+interval: - if found == True: - # #print(guessx, dist) - return guessx + dist - else: break - return None - -def peakaround(guessx, interval, fishes): - found = False - for cl, fish in fishes.items(): - for peak in fish: - - distold = interval - if peak.x < guessx-interval: - continue - # #print('in area') - if guessx-interval < peak.x < guessx+interval: - found = True - dist = peak.x-guessx - if abs(dist) < abs(distold): - distold = dist - if peak.x > guessx+interval: - if found == True: - # #print(guessx, dist) - return guessx + dist - else: break - return None - -def fill_holes(fishes): #returns peakx, peaky, peakheight # Fills holes that seem to be missed peaks in peakarray with fake (X/Y/height)-Peaks - retur = {} - lost = {} - for cl, fish in fishes.items(): - fishisi = np.diff(fish['x']) - mark = np.zeros_like(fishisi) - isi = 0 - ##print('mark', mark) - # #print('fishisi' , fishisi) - #find zigzag: - c=0 - c0= 0 - n=0 - for i, newisi in enumerate(fishisi): - if abs(newisi - isi)>0.15*isi: - if (newisi > isi) != (fishisi[i-1] > isi): - c+=1 - # #print(abs(newisi - isi), 'x = ', fish[i].x) - c0+=1 - elif c > 0: - n += 1 - if n == 6: - if c > 6: - # print ('zigzag x = ', fish['x'][i-6-c0], fish['x'][i-6]) - mark[i-6-c0:i-6]= -5 - c = 0 - c0=0 - n = 0 - - #if c > 0: - # #print(i, c) - # if c == 6: - # #print('zigzag!') - isi = newisi - isi = 0 - for i, newisi in enumerate(fishisi): - ##print('mark: ' , mark) - if mark[i] == -5: continue - if i+2 >= len(fishisi): - continue - if (2.2*isi > newisi > 1.8*isi) and (1.5*isi>fishisi[i+1] > 0.5*isi) : - mark[i] = 1 - isi = newisi - # #print('found 1!' , i) - elif (2.2*isi > newisi > 1.8*isi) and (2.2*isi> fishisi[i+1] > 1.8*isi) and (1.5*isi > fishisi[i+2] > 0.5*isi): - mark[i] = 1 - isi = isi - elif 3.4*isi > newisi > 2.6*isi and 1.5*isi > fishisi[i+1] > 0.5*isi: - mark[i] = 2 - - elif (0.6* isi > newisi > 0): - # #print('-1 found', i ) - if mark[i] ==0 and mark[i+1] ==0 and mark[i-1]==0 : - # isi = newisi - # continue - # #print('was not already set') - if fishisi[i-2] > isi < fishisi[i+1]: - mark[i] = -1 - # #print('-1') - elif isi > fishisi[i+1] < fishisi[i+2]: - mark[i+1] = -1 - # #print('-1') - isi = newisi - filldpeaks = [] - x = [] - y = [] - h = [] - x_lost=[] - y_lost=[] - h_lost=[] - # #print('filledmarks: ', mark) - for i, m in enumerate(mark): - if m == -1 : - # #print('-1 at x = ', fish['x'][i]) - continue - if m == -5: - x_lost.append(fish['x'][i]) - y_lost.append(fish['y'][i]) - h_lost.append(fish['h'][i]) - x.append(fish['x'][i]) - y.append(fish['y'][i]) - h.append(fish['h'][i]) - continue - x.append(fish['x'][i]) - y.append(fish['y'][i]) - h.append(fish['h'][i]) - if m == 1: - # #print('hofly added peak at x = ' , fish['x'][i]) - x.append(fish['x'][i] + fishisi[i-1]) - y.append( 0.5*(fish['y'][i]+fish['y'][i+1])) - h.append(0.5*(fish['h'][i]+fish['h'][i+1])) - elif m== 2: - x.append(fish['x'][i] + fishisi[i]) - y.append( 0.5*(fish['y'][i]+fish['y'][i+1])) - h.append(0.5*(fish['h'][i]+fish['h'][i+2])) - x.append(fish['x'][i] + 2*fishisi[i-1]) - y.append( 0.5*(fish['y'][i]+fish['y'][i+2])) - h.append(0.5*(fish['h'][i]+fish['h'][i+2])) - # #print('added at x = ', fish['x'][i] + fishisi[i]) - retur[cl] = {'x':x,'y':y,'h':h} - lost[cl] = {'xlost':x_lost,'ylost':y_lost,'hlost':h_lost} - # filledpeaks =np.array(filledpeaks) - # #print(filledpeaks.shape) - # filledpeaks. - return retur, lost - -def calc_tsh_noise(peaks, data): - heights = np.vectorize(lambda peak: peak.height)(peaks) - # peakx = xarray(peaks) - # peakxlist = peakx.tolist() - # #print('datenstdanfang: ', np.std(data)) - # datatsh = np.mean(np.abs(data))# - # datatsh = 2* np.std(data) - # peakareas = [i for x in peakx for i in range(x-10, x+10) if (i < len(data))] - # peakareas = np.arange(peakx-10, peakx+10, 1) - # relevantdata = [] - #peakareas = np.unique(peakareas) - # #print(len(peakareas), len(data), ' len peakarea and data' , datatsh) - #relevantdata is the data without the areas around the peaks, to calculate the standard deviation of the noise - #c = 0 - tsh = 0.1*np.std(heights) - - #for i, dat in enumerate(data): - # if peakareas[c] == i and c dist: - # dist = tdist - #print('dist', dist) - if dist>=0: - valid = True - if olddatalen > 0: - alignlabels(labels, peaks, olddatalen) - for i, p in enumerate(peaklist): - pcclasses[peaknum] = labels[i] - return valid - if takekm: - km = KMeans(n_clusters=3, n_init = 3, init = 'random', tol=1e-5, random_state=170, verbose = True).fit(X) - core_samples_mask = np.zeros_like(km.labels_, dtype=bool) - labels = km.labels_ - if takekm: - for i, p in enumerate(peaklist): - # print('label ', labels[i]) - pcclasses[peaknum] = p.pccl - # Number of clusters in labels, ignoring noise if present. - n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) - #print('Estimated number of clusters: %d' % n_clusters_) - # ############################################################################# - # Plot result - # Black removed and is used for noise instead. - unique_labels = set(labels) - colors = [plt.cm.Spectral(each) - for each in np.linspace(0, 1, len(unique_labels))] - fig = plt.figure() - ax = fig.add_subplot(111, projection = '3d') - for k, col in zip(unique_labels, colors): - if k == -1: - # Black used for noise. - col = [0, 0, 0, 1] - class_member_mask = (labels == k) - xy = X[class_member_mask] - # print(col) - ax.plot(xy[:, 0], xy[:, 1],xy[:,2], 'o', markerfacecolor=tuple(col), - markeredgecolor='k', markersize=14) - ax.set_title('Estimated number of clusters: %d' % n_clusters_) - #plt.show() - - - from sklearn.neighbors import kneighbors_graph - knn_graph = kneighbors_graph(X, 15, include_self=False) - ac = AgglomerativeClustering(linkage = 'complete', n_clusters = 3, connectivity = knn_graph).fit(X) - core_samples_mask = np.zeros_like(ac.labels_, dtype=bool) - labels = ac.labels_ - if takekm: - for i, p in enumerate(peaklist): - print('label ', labels[i]) - pcclasses[peaknum] = labels[i] - # Number of clusters in labels, ignoring noise if present. - n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) - #print('Estimated number of clusters: %d' % n_clusters_) - # ############################################################################# - # Plot result - # Black removed and is used for noise instead. - unique_labels = set(labels) - colors = [plt.cm.Spectral(each) - for each in np.linspace(0, 1, len(unique_labels))] - fig = plt.figure() - ax = fig.add_subplot(111, projection = '3d') - for k, col in zip(unique_labels, colors): - if k == -1: - # Black used for noise. - col = [0, 0, 0, 1] - class_member_mask = (labels == k) - xy = X[class_member_mask] - print(col) - ax.plot(xy[:, 0], xy[:, 1],xy[:,2], 'o', markerfacecolor=tuple(col), - markeredgecolor='k', markersize=14) - ax.set_title('Estimated number of clusters: %d' % n_clusters_) - #plt.show() - -def ampwalkclassify3_refactor(peaks,peaklist): # final classificator - classamount = peaklist.classamount - # for i in range(start, len(peaks)-start): - lastofclass = peaklist.lastofclass # dict of a lists of the last few heightvalues of a class, f.E ((1,[0.7,0.68,0.71]), (5, [0.2, 0.21, 0.21])) - lastofclassx = peaklist.lastofclassx # dict of a list of the last few x-values of a class - a=0 - elem = 0 - thresholder = [] - comperr = 1 - classesnearby = peaklist.classesnearby # list of the classes of the last n peaks (currently 12) f.E:[1,2,1,2,1,3,2,1,...] - classesnearbyx = peaklist.classesnearbyx # list of the x-values of the last n peaks, f.E:[13300, 13460, 13587, 13690, 13701, ...] - classesnearbypccl = peaklist.classesnearbypccl # list of the pc-classified classes of the last n peaks - classes = np.zeros((len(peaks[0]))) - pcclasses = peaks[3] - positions = peaks[0] - heights = peaks[1] - - # #print('nearbyclasses at start:' ,classesnearby, classesnearbyx) - # for peak in peaks: - # peak.cl = peak.pccl+2 - # peaklist.classlist = np.vectorize(lambda peak: peak.cl, otypes=[object])(peaklist.list) - # return peaks - cl = 0 - maxdistance = 30000 # Max distance to possibly belong to the same class - factor = 1.6 # factor by which a peak fits into a class, f.E: classheight = 1 , factor = 2 => peaks accepted in range (0.5,2) - c=0 - peakamount = len(peaks.T) - #fish = ProgressFish(total = peakamount) - for peaknum, p in enumerate(peaks.T): - perc = str((peaknum*100)//peakamount) - # fish.animate(amount = "", dexextra = 'Partstatus: '+ ' '*(3-len(perc)) +perc + ' % (' + ' '*(4-len(str(peaknum)))+str(peaknum) + '/' + ' ' *(4-len(str(peakamount)))+str(peakamount) + '), Filestatus:') - awc_btime = [] - if len(lastofclass) == 0: # Dict with all classes, containing the heights of the last few peaks - lastofclass[1] = deque() - lastofclassx[1]= deque() - lastofclass[1].append(heights[peaknum]) - lastofclassx[1].append(positions[peaknum]) - classesnearby.append(1) - classesnearbyx.append(-1) - classesnearbypccl.append(pcclasses[peaknum]) - classes[peaknum] = 1 - classamount += 1 - continue - time1 = time.time() - for i, cl in enumerate(classesnearby): - if (positions[peaknum]-classesnearbyx[i]) > maxdistance: - classesnearby.pop(i) - classesnearbyx.pop(i) - classesnearbypccl.pop(i) - lastofclassisis = [] - for i in classesnearby: - # print(i, classesnearby) - lastofclassisis.append(np.median(np.diff(lastofclassx[i]))) - meanisi = np.mean(lastofclassisis) - if 32000 > 20*meanisi> 6000: - maxdistance = 20*meanisi - #print(meanisi, maxdistance , 'maxdistance ----------------------------------------------------------------------------------------------') - - time2 = time.time() - awc_btime.append(time2-time1) #0 - cl = 0 # 'No class' - comperr = 1 - ##print('classesnearby at a peak', classesnearby) - clnrby = np.unique(classesnearby) - time1 = time.time() -# classmean = 0 - # if pcclasses[peaknum] == -1: - # factor = 1.2 - # else: - # factor = 1.6 - - for i in clnrby: - #print('cl: ', i) - # if classesnearbypccl[classesnearby.index(i)] == -1: - # factor = 2.2 - # else: factor = 1.6 - classmean = np.mean(lastofclass[i]) - logerror = np.abs(np.log2(heights[peaknum])-np.log2(classmean)) - abserror = np.abs(heights[peaknum]-classmean) - logthresh = np.log2(factor) - #ä#print(np.std(lastofclass[i])) absthresh = 0.5*classmean # #print('test log', np.abs(np.log2(np.array([0.4,0.5,1,1.5,2,2.4]))-np.log2(np.array([1,1,1,1,1,1]))) ) # abs(classmean*0.5) - #relerror = error - relerror = logerror - relabserror = abserror/thresh - # if 1140 < p.num < 1150: - # print(p.num) - # print('for classes at one peak: classmean, height, abserror, thresh', - # classmean,heights[peaknum], logerror, logthresh) - #print(len(classesnearbypccl), len(classesnearby)) - #print(classmean, heights[peaknum], logerror, logthresh, pcclasses[peaknum], classesnearbypccl[classesnearby.index(i)]) - if classesnearbypccl[classesnearby.index(i)] == pcclasses[peaknum] or pcclasses[peaknum] == -1:# or - if logerror < logthresh: ## SameClass-Condition - if relerror < comperr and (positions[peaknum]-classesnearbyx[classesnearby.index(i)]) 2*compareisierror: -# cl = holdlastcl - - time2 = time.time() - awc_btime.append(time2-time1) #1 - time1 = time.time() - if pcclasses[peaknum] != -1: - if cl != 0 : - #print(cl) - if len(lastofclass[cl]) >= 3: - lastofclass[cl].popleft() - if len(lastofclassx[cl]) >= 3: - lastofclassx[cl].popleft() - lastofclass[cl].append(heights[peaknum]) - lastofclassx[cl].append(positions[peaknum]) - classes[peaknum] = cl - else: # Add new class - cl = classamount+1 - #print('existingclasses: ', classamount) - classamount = cl - - #print('newclass: ----------------------------------------------------------------', cl) - lastofclass[cl] = deque() - lastofclassx[cl] = deque() - lastofclass[cl].append(heights[peaknum]) - lastofclassx[cl].append(positions[peaknum]) - classes[peaknum] = cl - classesnearby.append(cl) - classesnearbyx.append(positions[peaknum]) - classesnearbypccl.append(pcclasses[peaknum]) - ##print('tatsaechlich: ', cl) - if len(classesnearby) >= 12: #kacke implementiert? - minind = classesnearbyx.index(min(classesnearbyx)) - del lastofclass[classesnearby[minind]] - del lastofclassx[classesnearby[minind]] - #print(classesnearby[minind], 'del') - classesnearby.pop(minind) - classesnearbyx.pop(minind) - classesnearbypccl.pop(minind) - # for ind, clnrby in enumerate(reversed(classesnearby)): - # classesnearbyx - # del lastofclass[classesnearby[ind]] - # # del lastofclassx[classesnearby[minind]] - # classesnearby.pop(minind) - # classesnearbyx.pop(minind) - try: - ind=classesnearby.index(cl) - classesnearbyx[ind] = positions[peaknum] - # #print(ind ,' --------------------------------------here -----------------------------') - except ValueError: - classesnearby.append(cl) - classesnearbyx.append(positions[peaknum]) - classesnearbypccl.append(pcclasses[peaknum]) - else: - if cl != 0: - classes[peaknum] = cl - else: - cl = classamount+1 - #print('existingclasses: ', classamount) - classamount = cl - #print('newclass: ', cl) - lastofclass[cl] = deque() - lastofclassx[cl] = deque() - lastofclass[cl].append(heights[peaknum]) - lastofclassx[cl].append(positions[peaknum]) - classes[peaknum] = cl - classesnearby.append(cl) - classesnearbyx.append(positions[peaknum]) - classesnearbypccl.append(pcclasses[peaknum]) - if len(classesnearby) >= 12: #kacke implementiert? - minind = classesnearbyx.index(min(classesnearbyx)) - del lastofclass[classesnearby[minind]] - del lastofclassx[classesnearby[minind]] - #print(classesnearby[minind], 'del') - classesnearby.pop(minind) - classesnearbyx.pop(minind) - classesnearbypccl.pop(minind) - # for ind, clnrby in enumerate(reversed(classesnearby)): - # classesnearbyx - # del lastofclass[classesnearby[ind]] - # # del lastofclassx[classesnearby[minind]] - # classesnearby.pop(minind) - # classesnearbyx.pop(minind) - try: - ind=classesnearby.index(cl) - classesnearbyx[ind] = positions[peaknum] - # #print(ind ,' --------------------------------------here -----------------------------') - except ValueError: - classesnearby.append(cl) - classesnearbyx.append(positions[peaknum]) - classesnearbypccl.append(pcclasses[peaknum]) - # #print('classesnearby after a peak', classesnearby) - # for clnum, cls in enumerate(classesnearby): ## deleting almost identical classes (< % difference in amplitude) - # if cls == False: - # continue - # if True: - # continue - # compare = np.mean(lastofclass[cls]) - # for i in classesnearby[clnum:-1]: - # if i== False: - # continue - # if i != cls and abs(compare - np.mean(lastofclass[i])) < compare*0.01: ## - # # #print(compare) - # # #print( np.mean(np.vectorize(lambda peak: peak.height)(lastofclass[i]))) - # clindex = classesnearby.index(cls) - # classesnearby[clindex] = False - # classesnearbyx[clindex] = False - # del lastofclass[cls] - # del lastofclassx[cls] - # # cl = holdlastcl - # # if cl == cls: - # - # - # #print('combinedsomeclasses that were similar', cl, cls) - time2 = time.time() - # awc_btime.append(time2-time1) #2 - # classesnearby = [cls for cls in classesnearby if cls != False] - # classesnearbyx = [clx for clx in classesnearbyx if clx != False] - # - # - #print('awc_btime ', awc_btime , ' newpeak-------------------------------------------------------- :') - peaklist.lastofclass = lastofclass - peaklist.lastofclassx = lastofclassx - peaklist.classesnearby = classesnearby - peaklist.classesnearbyx = classesnearbyx - peaklist.classlist = classes # np.vectorize(lambda peak: peak.cl, otypes=[object])(peaklist.list) - peaklist.classamount = classamount - peaks = np.append(peaks,classes[None,:], axis = 0) - return peaks, peaklist - -def joincc(peaklist,peaks): - # peaklist = peaks.list - joinedsome = False - classlist = peaks[4] - peaksofclass = {} - last = [] - connect = {} #connect classes in connect+ - classcount = dict.fromkeys(classlist, 0) - ##print(classcount) - #classcount = [0]*len(np.unique(classlist)) - # #print(np.unique(classlist)) - for cl in np.unique(classlist): - peaksofclass[cl]= peaks[:,classlist == cl] - for i in range(len(peaks[0])): # i is the increasing index of the peaks - p = peaks[:,i] - poc = peaksofclass[p[4]] - classcount[p[4]]+=1 - countclass = p[4] #the current class before it might be changed to the connected class - if p[4] in connect: - p[4] = connect[p[4]] #peakclass is changed to connected class - # #print('changed ', countclass, 'to', p.cl) - joinedsome = True - - if len(poc) == classcount[countclass]: #the current peak is last peak of its class - last = poc[-len(poc) if len(poc) <= 5 else 5:] #the last peaks of the class - # #print('last: ', last) - #mean_last = np.mean(np.vectorize(lambda peak: peak[2])(last)) - mean_last = np.mean(last[2,:]) - nextfirst = {} # the first peaks of the next coming class(es) - # #print('class: ', countclass, 'at x = ', p.x, 'mean_last: ', mean_last) - for nexti in range(20): # the next 10 peaks are considered if they belong to the same classe - if i + nexti >= len(peaks[0]): break - inextp = peaks[:,i+nexti] - if classcount[inextp[4]] == 0: #current peak is first peak of its class - # #print('found a new begin! its class:' , inextp.cl) - ponc = peaksofclass[inextp[4]] # - nextfirst[inextp[4]] = ponc[0:len(ponc) if len(ponc) <= 5 else 5] - # #print(np.mean(np.vectorize(lambda peak: peak.height)(nextfirst[inextp.cl]))) - # #print(nextfirst) - compare = 1 - c = 0 - nextclass = -1 - for nextcl, first in nextfirst.items(): - mean_nextfirst = np.mean(first[2,:])#np.mean(np.vectorize(lambda peak: peak.height)(first)) - # #print(mean_nextfirst) - error = abs(mean_nextfirst - mean_last)/(mean_nextfirst) - if error < 1: - if compare < error: - continue - compare = error - if nextcl in connect: #if the peak that ist considered belongs to a class, that is already supposed to be connected to the current class - pocc = peaksofclass[connect[nextcl]] #peaks of the currently supposed connected class - if ( abs(mean_nextfirst - np.mean(pocc[-len(pocc) if -len(pocc) <= 5 else 5:][2])) - < abs(mean_nextfirst - mean_last) ): - continue - nextclass = nextcl - if nextclass != -1: - connect[nextclass] = p[4] - # #print('connect ', p.cl , ' and ', nextcl) - for cl in peaklist.classesnearby: - if cl in connect: - # #print('cl, connect', cl, connect[cl]) - peaklist.classesnearby[peaklist.classesnearby.index(cl)] = connect[cl] - peaklist.lastofclass[connect[cl]]=peaklist.lastofclass[cl] - peaklist.lastofclassx[connect[cl]]= peaklist.lastofclassx[cl] - peaklist.classlist = peaks[4] - return joinedsome - # for poc in peaksofclass: - # if len(poc) >= 3: - # newlast = poc[-3:] - # first = poc[:3] - # else: - # newlast = poc[-len(poc):] - # first = poc[:len(poc)] - # if last != []: - # if abs(np.mean(first) - np.mean(last)) < 0: - # #print('oh') - -def discardwaves_refactor(peaks, data): - - deleteclasses = [] - for cl in np.unique(peaks[3]): - peaksofclass = peaks[:,peaks[3] == cl] - isi = np.diff(peaksofclass[0]) - isi_mean = np.mean(isi) - # #print('isismean',isi_mean) - widepeaks = 0 - # #print('width',peaksofclass[2].width) - isi_tenth_area = lambda x, isi:np.arange(np.floor(x-0.1*isi),np.ceil(x+0.1*isi),1, dtype = np.int) - for p in peaksofclass.T: - data = np.array(data) - try: - for dp_around in data[isi_tenth_area(p[0],isi_mean)]:#np.floor(p[0]-0.1*isi_mean), np.ceil(p[0]+0.1*isi_mean),1)]:# - if dp_around <= p[1]-p[2]: - break - except IndexError: - pass - else: - widepeaks+=1 - ## p.isreal_pleateaupeaks() - if widepeaks > len(peaksofclass)*0.5: - deleteclasses.append(cl) - for cl in deleteclasses: - peaks = peaks[:,peaks[3]!=cl] - return peaks - -def smallclassdiscard(peaks, mincl): - classlist = peaks[3] - smallclasses = [cl for cl in np.unique(classlist) if len(classlist[classlist - == cl]) < - mincl] - delete = np.zeros(len(classlist)) - for cl in smallclasses: - delete[classlist == cl] == 1 - peaks = peaks[:,delete != 1] - return peaks - -def makepeak(data_x,cutsize, maxwidth, peakx, ltr, data_ltr, rtr, data_rtr, num, minhlr): - #if len(data) > peakx + cutsize/2: - return Peak(peakx, data_x, maketr(data_ltr, ltr), maketr(data_rtr, rtr), maxwidth, num, minhlr)#data[peakx-cutsize/2:peakx+cutsize/2], num) - #else: - # return Peak(peakx, data[peakx], - # maketr(data, ltr), - # maketr(data, rtr), - # maxwidth, - # #data[peakx-cutsize/2:-1], - # num) - -def maketr(data_x, x): - if x is not None: - return Tr(x,data_x) - else: - return None - -def makepeaklist(pkfirst, data, pk, tr, cutsize, maxwidth): - peaklist = np.empty([len(pk)], dtype = Peak) - trtopk = pkfirst - pktotr = 1-pkfirst - trlen = len(tr) - pklen = len(pk) - minhlr = lambda i, mwl, mwr : min( - abs( data[pk[i]] - min( data[pk[i]-mwl:pk[i]] ) if len(data[pk[i]-mwl:pk[i]]) > 0 else 0 ) - , - abs( data[pk[i]]- min( - data[pk[i]:pk[i]+mwr] ) if len(data[pk[i]:pk[i]+mwr]) > 0 else 0 ) - ) - #print(min( data[pk[0]-0:pk[2]]) ) - - if pktotr == 0: - peaklist[0] = makepeak(data[0], cutsize, maxwidth, pk[0], None, None, tr[pktotr], data[pktotr], 0, minhlr(0, 0, maxwidth)) - else: - peaklist[0] = makepeak(data[0], cutsize, maxwidth, pk[0], - tr[-trtopk], - data[-trtopk], tr[pktotr], data[pktotr], - 0, minhlr(0, min(maxwidth, - pk[0]-tr[-trtopk]) , maxwidth)) - for i in range(1,pklen-1): - peaklist[i] = makepeak(data[pk[i]], cutsize, maxwidth, pk[i], tr[i-trtopk], data[tr[i-trtopk]], tr[i+pktotr],data[tr[i+pktotr]], i, minhlr(i, maxwidth, maxwidth)) - if pktotr == 0 and pklen <= trlen: - peaklist[pklen-1] = makepeak(data[pk[pklen-1]],cutsize, maxwidth, pk[pklen-1], tr[pklen-trtopk-1], data[pklen-trtopk-1], tr[pklen+pktotr-1], data[pklen+pktotr-1], i, minhlr(pklen-1, maxwidth, min(maxwidth, tr[pklen+pktotr-1]-pk[pklen-1]))) - else: - peaklist[pklen-1] = makepeak(data[pk[pklen-1]],cutsize, maxwidth, pk[pklen-1], tr[pklen-trtopk-1],data[pklen-trtopk-1], None, None, pklen-1, minhlr(pklen-1, maxwidth, 0)) - return peaklist - -#def doublepeaks(peaks, peakwidth): -# dif2 = peaks[1].x-peaks[0].x -# if dif2 > 5* peakwidth: -# peaks[0].real = False -# for i in range(1,len(peaks)-1): -# dif1 = dif2 -# dif2 = peaks[i+1].x-peaks[i].x -# if dif1 > 5* peakwidth and dif2 > 5* peakwidth: -# peaks[i].real = False -# if dif2 > 5* peakwidth: -# peaks[len(peaks)-1] = False -# return peaks - -def discardunrealpeaks(peaklist): - peaks = peaklist[:][np.vectorize(lambda peak: peak.real, otypes=[object])(peaklist) == True] - for i, p in enumerate(peaks): - pass - # p.num = i - return peaks - -def discardnearbypeaks(peaks, peakwidth): - peaksx = xarray(peaks) - pkdiff = np.diff(peaksx) - # peakwidth = avg_peakwidth(pknum,tr) - pknumdel= np.empty(len(peaksx)) - pknumdel.fill(False) -# peaksy = yarray(peaks) - peaksh = heightarray(peaks) - for i,diff in enumerate(pkdiff): - # #print(peaks[i].height) - if diff < peakwidth: #* peaks[i].height: ### Trial Error - if peaksh[i+1] > 1.01 *peaksh[i] : - pknumdel[i] = True - else: - # print(peaksh[i],peaksh[i+1]) - pknumdel[i+1] = True - peaks = peaks[pknumdel!=True] - for i, p in enumerate(peaks): - p.num = i - return peaks - -def interpol(data, kind): - #kind = 'linear' , 'cubic' - width = len(data) - x = np.linspace(0, width-1, num = width, endpoint = True) - return interp1d(x, data[0:width], kind , assume_sorted=True) - -def cutcenter(peak): - p = peak - cut = p.cut - pl=p.distancetoltr - pr=p.distancetortr - if pl is None: - pl = 10 - tx = p.x-10 - else: tx = p.ltr.x - if pr is None: - pr = 10 - if pl < p.maxwidth and pr > 1: - - width=len(cut) - # #print('distancetoltr',pl) - peakshape = cut - interpolfreq = 1 - xnew = np.linspace(0,len(peakshape)-1, len(peakshape)*interpolfreq, endpoint= True) - curvyf = interpol(peakshape) - curvy= curvyf(xnew) - #px = p.cutsize/2 * 4 - #left = px - (5*4) - #plt.plot(xnew, curvy) - #x_0 = optimize.fsolve(curvyf, 1.0) - # f = interp1d(x, y) - # f2 = interp1d(range(width), data[x:x+width], kind='cubic') - ##xnew = np.linspace(0, width-1, num = width*4, endpoint = True) - ##print(xnew) - # plt.plot(xnew,f2(xnew)) - ##print("show") - #plt.show - trx = (p.cutsize/2 - (p.x - tx) ) - if trx >0 : - xstart = trx - else: - xstart = 0 - # #print('pkx: ', p.x, 'ltrx: ', p.ltr.x) - # #print('trx in intpol', x) - x = xstart - if curvyf(x) < 0: - left = 0 - right= 0 - while(x < width-1 and curvyf(x) < 0) : - left = x - # #print(curvyf(x)) - x+=0.25 - right = x - # #print('x: ', x , 'left, right: ', curvyf(left), curvyf(right)) - x = left+(1-curvyf(right)/(curvyf(right)-curvyf(left)))*1/interpolfreq - # #print(x) - else: - x = 0 - # #print(x_int) - # plt.scatter(xstart, curvyf(xstart), marker = 'x', s=150, zorder=2, linewidth=2, color='red') - # plt.scatter(x, curvyf(x), marker='x', s=150, zorder=2, linewidth=2, color='black') - # plt.show - # #print(x_int) - #p.relcutcenter = (p.ltr.x + x_int)-p.x - ##print('cent',p.relcutcenter) - #return (p.ltr.x + x_int)-p.x - - # while(data[x]>0) - else: - x= 0 - - return x - -def relcutarray(peaks): - return np.vectorize(lambda peak: peak.relcutcenter)(peaks) - -def xarray(peaks): - if len(peaks)>0: - peakx = np.vectorize(lambda peak: peak.x)(peaks) - return peakx - else: return [] - -def yarray(peaks): - if len(peaks)>0: - return np.vectorize(lambda peak: peak.y)(peaks) - else: return [] - -def heightarray(peaks): - if len(peaks)>0: - return np.vectorize(lambda peak: peak.height)(peaks) - else: return [] - -def clarray(peaks): - if len(peaks)>0: - return np.vectorize(lambda peak: peak.cl)(peaks) - else: return [] -def pcclarray(peaks): - if len(peaks)>0: - return np.vectorize(lambda peak: peak.pccl)(peaks) - else: return [] - -def peakxarray( ): - peakx = np.empty([len]) - peakx = np.vectorize(lambda peak: peak.x)(peaks) - return peakx - -def peakyarray( ): - peaky= np.empty([len]) - return np.vectorize(lambda peak: peak.y)(peaks) - - -def classify( ): - #template = peaks[0] - meanfit = np.mean(np.vectorize(fit, otypes=[object])(template,peaks)) - for p in peaks: - if fit(template,p) < meanfit: - # #print('classified ', fit(template,p) , ' meanfit: ' , meanfit) - p.currentclass = 1 - -def classifyhiker(template, peaks): - meanfit = np.mean(np.vectorize(fitinterpol2, otypes=[object])(template,peaks)) - #toclassify = peaks.tolist() - firstnot = 0 - for c in range(1,5): - first = True - template = peaks[firstnot] - for i, p in enumerate(peaks[firstnot:]): - if p.currentclass == 0: - if fitinterpol2(template,p) < meanfit: - # #print('peak number ' , i, 'classified as ', c, fit(template,p) , ' meanfit: ' , meanfit) - p.currentclass = c - template = p - elif first == True: - # #print('peak number ' , i, 'classified as First! ', c, fit(template,p) , ' meanfit: ' , meanfit) - firstnot = i - first = False - else: - None - ##print('peak number ' , i, 'classified as not classified!', fit(template,p) , ' meanfit: ' , meanfit) - return peaks - - - # def Templatefitnext( , number, templnum): - # for p in peaks: - # if fit(peaks[templnum], p) < fitparameter: - -def cut_snippets(data, peaklist, rnge): - snippets = [] - positions = xarray(peaklist) - heights = heightarray(peaklist) - for pos in positions: - snippets.append(data[(pos+rnge[0]):(pos+rnge[1])]) - scaledsnips = np.empty_like(snippets) - for i, snip in enumerate(snippets): - top = -rnge[0] - # plt.plot(snip) - scaledsnips[i] = snip * 1/heights[i] - #plt.plot(scaledsnips[i]) - # print('plted') -# plt.show() - #print('1') - alignedsnips = np.empty((len(snippets), (rnge[1]-rnge[0])*10-30-10)) - standardized = np.empty((len(snippets), (rnge[1]-rnge[0])*10-10)) - intfact = 10 - for i, snip in enumerate(scaledsnips): - if len(snip) < ((rnge[1]-rnge[0])): - if i == 0: - snip =np.concatenate([np.zeros([((rnge[1]-rnge[0]) - len(snip))]),np.array(snip)]) - if i == len(scaledsnips): - snip = np.concatenate([snip, np.zeros([((rnge[1]-rnge[0])-len(snip))])]) - else: - # print('this') - snip = np.zeros([(rnge[1]-rnge[0])]) - interpoled_snip = interpol(snip)(np.arange(0, len(snip)-1, 1/intfact)) if len(snip) > 0 else np.zeros([(rnge[1]-rnge[0]-1)*intfact ]) #interpolfactor 10 - - intsnipheight = np.max(interpoled_snip) - np.min(interpoled_snip) - if intsnipheight == 0: - intsnipheight = 1 - interpoled_snip = (interpoled_snip - max(interpoled_snip))* 1/intsnipheight - standardized[i] = interpoled_snip - #print('2') - mean = np.mean(standardized, axis = 0) - #plt.plot(mean) -# plt.show() - #plt.plot(mean[10*-rnge[0]-10*5:-10*rnge[1]+21]) -# plt.show() - meantop = np.argmax(mean) - for i, snip in enumerate(standardized): - #plt.show() - interpoled_snip = snip #standardized[i] - cc = crosscorrelation(interpoled_snip[15:-15], mean) - #cc = crosscorrelation(interpoled_snip[15 + 10*-rnge[0]-10*7:-15+ -10*rnge[1]+ 31], mean[10*-rnge[0]-10*7:-10*rnge[1]+31]) - #plt.plot(interpoled_snip[15 + 10*-rnge[0]-10*7:-15+ -10*rnge[1]+ 31]) - #top = np.argmax(interpoled_snip) - #offset = meantop - top - #if not(-15 <= offset <= 15): offset = 0 - offset = -15 + np.argmax(cc) - interpoled_snip = interpoled_snip[15-offset:-15-offset] if offset != -15 else interpoled_snip[30:] - #print(offset) - #plt.plot(interpoled_snip) - if len(interpoled_snip[~np.isnan(interpoled_snip)])>0: - alignedsnips[i] = interpoled_snip - #plt.show() - # print('3') - return snippets, alignedsnips - - - -def fit(templ, peak): - fit = np.sum(np.square(templ.cut - peak.cut)) - return fit - -def fitinterpol2(templ,peak): - t = templ - p = peak - if p.real and t.real: - fit = np.sum(np.square(t.cutaligned-p.cutaligned)) - else: - fit = 0 - return fit - - - -def fitinterpol( templ, peak): - t = templ - p = peak - if p.real: - centerp = cutcenter(p) - centert = cutcenter(t) - shiftp = centerp-p.cutsize/2 - shiftt = centert-t.cutsize/2 - - if shiftp > -5: - shiftp = min(5, 5+centerp-p.cutsize/2) - else: shiftp = 0 - - if shiftt > -5: - shiftt = min(5, 5+centert-t.cutsize/2) - else: shiftt = 0 - - xnew = np.linspace(0,p.cutsize-11, (p.cutsize-1) * 4,endpoint = True) - #peak_interpoled = interpol(p.cut)(xnew) - #plt.plot(xnew, interpol(p.cut)(xnew+shift)) - # #print(interpol(templ.cut)(xnew+shiftt)-interpol(p.cut)(xnew+shiftp)) - fit = np.sum(np.square(interpol(templ.cut)(xnew+shiftt)-interpol(p.cut)(xnew+shiftp))) - else: - fit = 0 - return fit - - -def plotdata(peaks, data): - x = xarray(peaks) - y = yarray(peaks) - plt.plot(range(len(data)),data) - plt.plot(x, y, '.r', ms=20) - #for p in peaks: - # #print(p.height, p.x, p.y, p.distancetoltr, p.distancetortr, p.nexttrdistance) - # plt.plot(tr, data[tr], '.g', ms=20) - plt.show() - - -def plotdatabyx(peaksx, data): - x = peaksx - y = data[peaksx] - plt.plot(range(len(data)),data) - plt.plot(x, y, '.r', ms=20) - plt.show() - #for p in peaks: - # #print(p.height, p.x, p.y, p.distancetoltr, p.distancetortr, p.nexttrdistance) - # plt.plot(tr, data[tr], '.g', ms=20) - -def plotpeak(peaks): - #plt.plot(peaks), cutpeaks) #bei betrachtung aller blocks zu groß! - for p in peaks: - plt.plot(range(p.cutsize),p.cut) - #plt.plot(pk, x[pk] , '.r', ms=20) - plt.show() - - -def periodicinclass(peaks, cl): - noiselist = [] - classlist = np.vectorize(lambda peak: peak.cl, otypes=[object])(peaks) - peaks = xarray(peaks) - peaks = peaks[:][classlist == cl] - periodic = [] - periodiccollector = [] - error2 = [] - isperiodic = True - b=1 - c=2 - ctofar = False - compdif = 0 - dif = 0 - count = 1 - foundtriple = False - next = 0 - for i in range(len(peaks)-1): - if i != next: continue - # #print(i, 'foundtriple', foundtriple) - error2 = [] - b=1 - c=0 - A = peaks[i] - B = peaks[i+b] - compdif = dif - while foundtriple == True and count <= 3 and i+1 < len(peaks)-1: - while B-A < compdif*1.5 and i+b+1 < len(peaks)-1: - # #print('newdif: ', B-A, 'olddif:' , dif) - if abs((B-A) - compdif) < compdif*0.4: - error2.append(abs((B-A) - dif)) - b+=1 - B = peaks[i+b] - if len(error2) > 0: - bestB = error2.index(min(error2)) - B = peaks[i+1 + bestB] - periodic.append(B) - dif = 0.5*(dif + (B-A)) - # #print('match found') - b = 1+bestB - break - else: - count+=1 - compdif = dif*count - else: - if foundtriple == True: - # #print('no further match found, ') - isperiodic = False - - - - - while foundtriple == False and i+c< len(peaks)-1: - while i+c < len(peaks)-1: - A = peaks[i] - B = peaks[i+b] - C = peaks[i+c] - dif1 = B - A - dif2 = C - B - if (C-B > (B-A)*1.5): - break - if abs(dif1 - dif2) < dif1*0.4: - error2.append(abs(dif1-dif2)) - c +=1 - #C = peaks[i+c] # C weiterlaufenlassen, bis zu weit - else: - if len(error2) == 0: - # #print('no triple found') - isperiodic = False - if len(error2) > 0: - bestC = error2.index(min(error2)) - C = peaks[i+2 + bestC] - c = 2+ bestC - periodic.extend((A,B,C)) - dif1 = B - A - dif2 = C - B - # #print('dif1: ', dif1, 'dif2: ', dif2) - dif = 0.5*(dif2+dif1) - foundtriple = True - # #print('triple found', i+c, 'dif : ', dif) - else: - error2 = [] # B weiterlaufen lassen, C reset auf B+1 - b +=1 - c = b+1 - - if isperiodic == False: - if len(periodic) > 3: - periodiccollector.append(periodic) - isperiodic = True - periodic = [] - if c!=0: - next = i+c - else: - next = i+b - if len(periodiccollector) > 0: - # for i in range(len(periodiccollector)): - # #print('collector ', i, periodiccollector[i]) - return periodiccollector - else: - #print('no periodicity found') - return [] - - - -def noisediscard(peaklist, tsh_n, ultimate_threshold): - detected_noise = False - ##print('noisetsh: ', tsh_n) - for p in peaklist.list: - - if p.height < tsh_n or p.height < ultimate_threshold: - p.noise = True - detected_noise = True - peaklist.list = peaklist.list[:][np.vectorize(lambda peak: peak.noise, otypes=[object])(peaklist.list) == False] - # #print(peaks) - # for cl in classlist: - # diff = np.vectorize(lambda peak: peak.x, otypes=[object])(peaks[:][classlist == cl]) - # meandiff = np.mean(diff) - # msecompare = np.mean(np.square(diff-(diff*0.8))) - # mse = np.mean(np.square(diff-meandiff)) - # if mse > msecompare: - # noiselist.append(cl) - # for p in peaks: - #if p.cl in noiselist: - # if p.height < 0.1: - # p.noise = True - # peaks = peaks[:][np.vectorize(lambda peak: peak.noise, otypes=[object])(peaks) == False] - # return peaks - return detected_noise - - -def plotPCclasses(peaks, data): - plt.plot(range(len(data)),data, color = 'black') - classlist = np.vectorize(lambda peak: peak.pccl, otypes=[object])(peaks) - cmap = plt.get_cmap('jet') - colors =cmap(np.linspace(0, 1.0, 3000)) #len(np.unique(classlist)))) - np.random.seed(22) - np.random.shuffle(colors) - colors = [colors[cl] for cl in np.unique(classlist)] - print('classlist', np.unique(classlist)) - # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.c', ms=20) - # x=0 -# if len(classlist)>0: - # #print(classlist) - # #print('classes: ' , np.unique(classlist)) - #from collections import Counter - #count = Counter(classlist) - # #print('longest class: ', count.most_common()[0]) - for num, color in zip(np.unique(classlist), colors): - if num == -1 : - color = 'black' - peaksofclass = peaks[:][classlist == num] - #xpred = linreg_pattern(peaksofclass[0:3]) - #for p in peaksofclass[0:3]: - # #print(p.x) - ##print(xpred, peaksofclass[3].x) - - #if len(peaksofclass) > 1000: - # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.', color = 'red', ms =20) - #else: - print(num) - plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.', color = color, ms =20) - plt.scatter(xarray(peaksofclass), heightarray(peaksofclass)) - # for p in peaks: - # plt.text(p.x, p.y, p.num) - #plt.show() - - # plt.show() - plt.close() - -def plotampwalkclasses_refactored(peaks, data): - plt.plot(range(len(data)),data, color = 'black') - classlist = np.array(peaks[3],dtype=np.int) - cmap = plt.get_cmap('jet') - colors =cmap(np.linspace(0, 1.0, 3000)) #len(np.unique(classlist)))) - np.random.seed(22) - np.random.shuffle(colors) - colors = [colors[cl] for cl in np.unique(classlist)] - # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.c', ms=20) - # x=0 -# if len(classlist)>0: - # #print(classlist) - # #print('classes: ' , np.unique(classlist)) - #from collections import Counter - #count = Counter(classlist) - # #print('longest class: ', count.most_common()[0]) - for cl, color in zip(np.unique(classlist), colors): - peaksofclass = peaks[:,classlist == cl] - #xpred = linreg_pattern(peaksofclass[0:3]) - #for p in peaksofclass[0:3]: - # #print(p.x) - ##print(xpred, peaksofclass[3].x) - - #if len(peaksofclass) > 1000: - # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.', color = 'red', ms =20) - #else: - - plt.plot(peaksofclass[0],peaksofclass[1], '.', color = color, ms =20) - plt.scatter(peaksofclass[0], peaksofclass[2]) - # for p in peaks: - # plt.text(p.x, p.y, p.num) - plt.show() - - # plt.show() - plt.close() - - -def crosscorrelation(sig, data): - autocorr = signal.fftconvolve(data, sig[::-1], mode='valid') - return autocorr - -def plottemplatefits(data, peaks, tr, templnum): - # - plotdata(peaks, data, tr) - plt.plot(range(len(data)),data) - classes = np.vectorize(lambda peak: peak.currentclass, otypes=[object])(peaks) - class1 = peaks[:][classes == 1 ] - if len(class1) > 0: - plt.plot(xarray(class1), yarray(class1), '.r', ms=20) - class2 = peaks[:][classes == 2 ] - if len(class2) > 0: - plt.plot(xarray(class2), yarray(class2), '.g', ms=20) - class3 = peaks[:][classes == 3 ] - if len(class3) > 0: - plt.plot(xarray(class3), yarray(class3), '.c', ms=20) - class4 = peaks[:][classes == 4 ] - if len(class4) > 0: - plt.plot(xarray(class4), yarray(class4), '.y', ms=20) - - # for p in peaks: # <-- - # plt.text(p.x , p.y, p.num) - - # plt.plot(tr, data[tr], '.g', ms=20) - plt.show() - -def linreg_pattern(peaks): - from sklearn import datasets, linear_model - from sklearn.metrics import mean_squared_error, r2_score - - peaksx = xarray(peaks) - peaksx = peaksx.reshape(-1,1) - #peaksh = heightarray(peaks) - #peakx = peak.x - # Create linear regression object - regr = linear_model.LinearRegression() - numbers = np.arange(len(peaks)).reshape(-1,1) - # Train the model using the training sets - regr.fit(numbers, peaksx) - - # Make predictions using the testing set - peakx_pred = regr.predict(len(peaks)) - # # The coefficients - # #print('Coefficients: \n', regr.coef_) - # # The mean squared error - # #print("Mean squared error: %.2f" - # % mean_squared_error(diabetes_y_test, diabetes_y_pred)) - # # Explained variance score: 1 is perfect prediction - # #print('Variance score: %.2f' % r2_score(diabetes_y_test, diabetes_y_pred) - - - # Plot outputs - #plt.scatter(peaksx, peaksh, color='black') - #plt.scatter(peakx, peakh_pred, color='blue') - - #plt.xticks(()) - #plt.yticks(()) - - # plt.show() - - return peakx_pred - -def linreg(peaks, peak): - from sklearn import datasets, linear_model - from sklearn.metrics import mean_squared_error, r2_score - - peaksx = xarray(peaks) - peaksx = peaksx.reshape(-1,1) - peaksh = heightarray(peaks) - peakx = peak.x - # Create linear regression object - regr = linear_model.LinearRegression() - - # Train the model using the training sets - regr.fit(peaksx, peaksh) - - # Make predictions using the testing set - peakh_pred = regr.predict(peakx) - - # # The coefficients - # #print('Coefficients: \n', regr.coef_) - # # The mean squared error - # #print("Mean squared error: %.2f" - # % mean_squared_error(diabetes_y_test, diabetes_y_pred)) - # # Explained variance score: 1 is perfect prediction - # #print('Variance score: %.2f' % r2_score(diabetes_y_test, diabetes_y_pred) - - - # Plot outputs - #plt.scatter(peaksx, peaksh, color='black') - #plt.scatter(peakx, peakh_pred, color='blue') - - #plt.xticks(()) - #plt.yticks(()) - - # plt.show() - - - - return peakh_pred - -def wp_transform(x): - import pywt - wp = pywt.WaveletPacket(data=x, wavelet='haar', mode='symmetric') - print('maxlevel: ', wp[''].maxlevel) - return (np.array([node.data for node in wp.get_level(wp[''].maxlevel, 'freq')])).flatten() - -def wpfeats(snips): - size = len(wp_transform(snips[0])) - wp = np.empty([len(snips), size]) - for i, snip in enumerate(snips): - print(wp_transform(snip)) - wp[i] = (wp_transform(snip)) - #wp = wp.T - print(wp[0]) - wpcoef = wp.T - print(wp[0]) - from sklearn.preprocessing import StandardScaler - wpcoef = StandardScaler().fit_transform(wpcoef) - coeffvalues = [] - for coeff in wpcoef: - stat, crit, sig = stats.anderson(coeff, dist = 'norm') - # coeffvalues.append(stat) - coeffvalues.append(np.sum(np.abs(coeff))) - coeffvalues = np.array(coeffvalues) - coeffs = np.argsort(coeffvalues)[::-1][:10] - print(coeffvalues[coeffs]) - return wp.T[coeffs] - - - - -def pc(cutsnippets, peaklist): - # (observations, features) matrix - M = np.empty([len(cutsnippets), len(cutsnippets[0])]) - for i, snip in enumerate(cutsnippets): - M[i] = snip[:] - from sklearn.preprocessing import StandardScaler - StandardScaler().fit_transform(M) - # #print(M.shape, ' Mshape') - # singular value decomposition factorises your data matrix such that: - # - # M = U*S*V.T (where '*' is matrix multiplication) - # - # * U and V are the singular matrices, containing orthogonal vectors of - # unit length in their rows and columns respectively. - # - # * S is a diagonal matrix containing the singular values of M - these - # values squared divided by the number of observations will give the - # variance explained by each PC. - # - # * if M is considered to be an (observations, features) matrix, the PCs - # themselves would correspond to the rows of S^(1/2)*V.T. if M is - # (features, observations) then the PCs would be the columns of - # U*S^(1/2). - # - # * since U and V both contain orthonormal vectors, U*V.T is equivalent - # to a whitened version of M. - - U, s, Vt = np.linalg.svd(M, full_matrices=False) - V = Vt.T - - # PCs are already sorted by descending order - # of the singular values (i.e. by the - # proportion of total variance they explain) - S = np.diag(s) - # PC = (s*V) - # PCs: - #print(U.shape) - #print(S.shape) - #print(V.shape) - #print(s[0], U[0,:]) - - #PC1 = (s[0] * U[:,0]) - #PC2 = (s[1] * U[:,1]) - #for i, p in enumerate(peaklist): - # p.pc1 = PC1[i] - # p.pc2 = PC2[i] - - #mu = peaks.mean(axis=0) - #fig, ax = plt.subplots() - #ax.scatter(xData, yData) - #for axis in U: - # start, end = mu, mu + sigma * axis - # ax.annotate( - # '', xy=end, xycoords='data', - # xytext=start, textcoords='data', - # arrowprops=dict(facecolor='red', width=2.0)) - #ax.set_aspect('equal') - #plt.show() - - - # if plot_steps: - # plt.scatter(PC1, PC2) - # plt.show() - - # PCData1 = (U[:,0]*M) - # PCData2 = (U[:,1]*M) - # plt.scatter(PCData1, PCData2) - # plt.show() - - #plt.scatter(U[:,0],U[:,1]) - #plt.show() - #print('done') - #return PC - - # if we use all of the PCs we can reconstruct the noisy signal perfectly - #Mhat = np.dot(U, np.dot(S, V.T)) - #print('Using all PCs, MSE = %.6G' %(np.mean((M - Mhat)**2))) - - #plt.show() - return S@U.T - -def gettime(x, samplerate, starttime): - startm = int(starttime[-2:]) - starth = int(starttime[:-2]) - seconds = x/samplerate - m, s = divmod(seconds, 60) - m = m + startm - h, m = divmod(m, 60) - h = h+starth - return "%d:%02d:%02d" % (h, m, s) - -def connect_blocks(oldblock): - newblock = Peaklist([]) - newblock.lastofclass = oldblock.lastofclass - newblock.lastofclassx = oldblock.lastofclassx - newblock.classesnearby = oldblock.classesnearby - newblock.classesnearbypccl = oldblock.classesnearbypccl - newblock.classesnearbyx = [clnearbyx - oldblock.len for clnearbyx in oldblock.classesnearbyx] - newblock.classamount = oldblock.classamount - return newblock - ##print('classesnearbyx! old, new ' , oldblock_len,oldblock.classesnearbyx , newblock.classesnearbyx) - -if __name__ == '__main__': - main() - - - -# deleted Code, but unsure if really want to delete: - - #nix #print( b.data_arrays) - - # for cl in np.unique(cllist): - - # currentfish_x = x[:][cllist == cl] - # currentfish_y = y[:][cllist == cl] - # currentfish_h = x[:][cllist == cl] - - - #nix try: - #nix xpositions[cl] = b.create_data_array("f%d_eods" %cl, "spiketimes", data = currentfish_x) - #nix xpositions[cl].append_set_dimension() - #nix # thisfish_eods = b.create_multi_tag("f%d_eods_x"%cl, "eods.position", xpositions[cl]) - #nix # thisfish_eods.references.append(nixdata) - #nix except nix.pycore.exceptions.exceptions.DuplicateName: - #nix - #nix xpositions[cl].append(currentfish_x) - - - #thisfish_eods.create_feature(y, nix.LinkType.Indexed) - #b.create_multi_tag("f%d_eods_y"%cl, "eods.y", positions = y) - #b.create_multi_tag("f%d_eods_h"%cl, "eods.amplitude", positions = h) - #thisfish_eods.create_feature - - - - -# in analyseEods -# in analyseEods classlist = eods[3] #np.vectorize(lambda peak: peak.cl, otypes=[object])(worldpeaks.list) -# in analyseEods fishclass = {} -# in analyseEods #print('classlist: ', classlist) -# in analyseEods # #print('Classes at end: ', np.unique(classlist)) -# in analyseEods -# in analyseEods -# in analyseEods fishes = {} -# in analyseEods for num in np.unique(classlist): -# in analyseEods fishes[num] = eods[:,:][: , classlist == num] -# in analyseEods -# in analyseEods -# in analyseEods -# in analyseEods -# in analyseEods fishes = fill_hidden_3(fishes) # cl-dict : x y z -dict -# in analyseEods #maxlencl = max(fishes, key=lambda k: fishes[k]['x'][-1]-fishes[k]['x'][0]) -# in analyseEods -# in analyseEods fishes, weirdparts = fill_holes(fishes) -# in analyseEods fishes, weirdparts = fill_holes(fishes) -# in analyseEods -# in analyseEods for cl in np.unique(classlist): -# in analyseEods isi = [isi for isi in np.diff(fishes[cl]['x'])] -# in analyseEods fishes[cl][3]= isi -# in analyseEods - - -#npFish -#npFish npFishes = {} -#npFish fishfeaturecount = len(fishes[cl]) -#npFish for cl in np.unique(classlist): -#npFish npFishes[cl]= np.zeros([fishfeaturecount, len(fishes[cl]['x'])]) -#npFish for i, feature in enumerate(['x', 'y', 'h', 'isi']): #enumerate(fishes[cl]): -#npFish if feature == 'isi': -#npFish fishes[cl][feature].append(fishes[cl][feature][-1]) -#npFish # #print(feature, cl) -#npFish npFishes[cl][i] = np.array(fishes[cl][feature]) -#npFish # #print(npFishes[classlist[0]][0]) -#npFish # #print(npFishes[classlist[0]][2]) -#npFish # #print(npFishes[classlist[0]][3]) -#npFish #np.savetxt('worldpeaks_x_y_cl_2', (x,y,cl, isi), fmt="%s") -#npFish -#npFish np.set_printoptions(threshold=np.nan) -#npFish -#npFish for i, cl in enumerate(np.unique(classlist)): #Neue Klassennamen! -#npFish x = npFishes[cl][0] -#npFish y = npFishes[cl][1] -#npFish h = npFishes[cl][2] -#npFish isi = npFishes[cl][3] -#npFish -#npFish np.savetxt(filename[:-4]+'Fish_xyhisi_cl%d' % i, npFishes[cl], fmt="%s") -#npFish -#npFish -#npFish - - - - - - # / TODO: Peakclassifikator bei weit wegliegenden klassen? Done - # / TODO: Class2 implementation auf class linreg übertragen Done - Doof - # TODO: Klassen zusammenfuegen/ Noise zusammenfuegen - # - Wenn last 3 und first 3 zueinander passen in 1. Amplitude und 2. Periode (falls peaks) oder 2. randomzeugs? - Noiseerkennung und 2. Amplitude - # TODO: Klassen filtern auf Patternausreißer - # diff --git a/thunderfish/DextersThunderfishAddition/analyzeEods.py b/thunderfish/DextersThunderfishAddition/analyzeEods.py deleted file mode 100644 index f75339d5..00000000 --- a/thunderfish/DextersThunderfishAddition/analyzeEods.py +++ /dev/null @@ -1,1104 +0,0 @@ -import sys -import numpy as np -import copy -from scipy.stats import gmean -from scipy import signal -from scipy import optimize -import matplotlib.pyplot as plt -import matplotlib.colors as mplcolors -from thunderfish.dataloader import open_data -from thunderfish.peakdetection import detect_peaks -from scipy.interpolate import interp1d -from scipy.signal import savgol_filter -from collections import deque -import nixio as nix -import time -import os -import pickle - -deltat = 60.0 # seconds of buffer size -thresh = 0.05 -mind = 0.1 # minimum distance between peaks -peakwidththresh = 30 # maximum distance between max(peak) and min(trough) of a peak, in datapoints -new = 0 - -def main(): ############################################################# Get arguments eodsfilepath, plot, (opt)save, (opt)new - - filepath = sys.argv[1] - sys.argv = sys.argv[1:] - - plot = 0 - save = 0 - print(sys.argv) - if len(sys.argv)==2: - plot = int(sys.argv[1]) - print(plot) - if len(sys.argv)==3: - plot = int(sys.argv[1]) - save = int(sys.argv[2]) - print('saving results: ', save) - import ntpath - if len(sys.argv)==4: - plot = int(sys.argv[1]) - save = int(sys.argv[2]) - new = int(sys.argv[3]) - print('saving results: ', save) - ntpath.basename("a/b/c") - def path_leaf(path): - head, tail = ntpath.split(path) - return tail or ntpath.basename(head) - filename = path_leaf(filepath) - prefixlen = filename.find('_')+1 - starttime = "2000" - home = os.path.expanduser('~') - path = filename[prefixlen:-4]+"/" - os.chdir(home+'/'+path) # operating in directory home/audiofilename/ - - # if os.path.exists(filename[prefixlen:-4]+'_AmpFreq4.pdf'): - # new = 0 - - with open_data(filename[prefixlen:-4]+".WAV", 0, 60, 0.0, 0.0) as data: - samplerate = data.samplerate - datalen = len(data) - - ############################################################# Fileimport and analyze; or skip, if analyzed data already exists - if new == 1 or not os.path.exists('classes/'+ filename[prefixlen:-4]+"_classes.npz"): - print('new analyse') - eods = np.load(filename, mmap_mode='c') - - # time1 = 40000 - # time2 = 45000 - # time1x = time1 * samplerate - # time2x = time2 * samplerate - # startpeak = np.where(((eods[0]>time1x)&(eods[0]= 100 and i % (classamount//100) == 0: - print(i) - fishclass = eods[:,:][: , classlist == num] - fish = [] - if len(fishclass[0]) < 12: - continue - for i , feature in enumerate(fishclass): - if i != 3: - fish.append(feature) -# print('fish - printing to check structure', fish) - temp_classisi = np.diff(fishclass[0]) - #print(temp_classisi) - #print('plot smooth vs orig', len(temp_classisi)) - binlen=10 - # temp_classisi_medians = temp_classisi#bin_median(temp_classisi, 1) - # smoothed = savgol_filter(temp_classisi_medians,11,1) - # diff = np.square(smoothed-temp_classisi_medians) - # data = np.array(diff) - # result = np.median(data[:(data.size // binlen) * binlen].reshape(-1, binlen),axis=1) - # result2 = bin_percentilediff(temp_classisi, 20) - # if len(result) > 7 and len(result2) > 7: - # smoothedresult = savgol_filter(result, 7, 1) - # smoothedresult2 = savgol_filter(result2, 7, 1) - # else: - # smoothedresult = result - # smoothedresult2 = result2 - # #plt.plot(np.arange(0,len(result)*binlen, binlen),result) - # #plt.plot(smoothed) - # #plt.plot(np.arange(0,len(result2)*20, 20), smoothedresult2) - # #plt.plot(np.arange(0,len(result2)*20, 20), result2) - # # plt.plot(temp_classisi_medians) - # #plt.plot(np.arange(0, len(smoothedresult)*binlen, binlen),smoothedresult) - # noiseindice = np.where(smoothedresult > 100000) - # #print(noiseindice) - # noiseindice = np.multiply(noiseindice, binlen) - # #print(noiseindice) - # noiseindice = [x for i in noiseindice[0] for x in range(i, i+10)] - # print(np.diff(noiseindice)) - # noiseindice = np.split(noiseindice, np.where((np.diff(noiseindice) != 1 ) & (np.diff(noiseindice) != 2) & (np.diff(noiseindice) != 3))[0]+1 ) - # #print(noiseindice) - # noiseindice = [x for arr in noiseindice if len(arr) > 20 for x in arr[50:-51]] - # noiseindice= np.array(noiseindice) - # #print(noiseindice) - # fish = np.array(fish) - # # Noise delete applial - # # if len(noiseindice) >0 : - # # fish[:,noiseindice] = np.nan #np.setdiff1d(np.arange(0, len(fish[0]),1),(noiseindice))] = np.nan - # fish = list(fish) - # #plt.plot(temp_classisi) - # plt.show() - binlen = 60 - #print(len(fish[0])) - if discardcondition1(fish) == False: # condition length < 10 - # if False: - mean, std, d2, d8 = bin_array_mean(temp_classisi,binlen) - # print('mean, std, d2, d8', mean, std, d2, d8) - count = ((mean * 4 >= d8) * (d2 >= mean * 0.25)) .sum() # condition_2 : if 0.2, and 0.8 deciles of the ISI of ONE SECOND/binlen are in the area of the median by a factor of 2, then the class seems to have not too much variability. - # Problem: Case, Frequency changes rapidly during one second/binlen , then the 0.8 or 0.2 will be out of the area... - # But then there is one wrong estimation, not too much of a problem - #print('fish') - # if count >= 0.5*(len(temp_classisi)//binlen +1): - if True: - fishes.append(fish) - #print('len fishes after append', len(fishes)) - #print('printing fishes to check structure', fishes[0][0]) - # ontimes = np.load('ontime'+filename[prefixlen:-4]+'.npz') - # ontime = [] - # # for c, items in enumerate(ontimes.items()): - # # ontime.append(items[1]) - # ontime.append(ontimes['on']) - # ontime.append(ontimes['near']) - # ontime.append(ontimes['far']) - # - # if plot == 1: - # plot_ontimes(ontime) - - #print(eods[0][-1]//samplerate, len(ontime[0])) - if fishes is not None: - - #for fish in fishes: - # fish[0] - - # improving the fishpeak-data by adding peaks at places where theses peaks are hidden behind other (stronger)peaks - #fishes = fill_hidden_3(fishes, eods, filename) # cl-dict : x y z -dict - # filling holes or removing unexpected peaks from the class which are most likely caused by false classification - #fishes, weirdparts = fill_holes(fishes) - #fishes, weirdparts = fill_holes(fishes) - - if fishes is not None: - if len(fishes) > 0: - for cl, fish in enumerate(fishes): - ### Filter to only get ontimes close and nearby - for i, x in enumerate(fish[0]): - print(x) - #if x//samplerate < len(ontime[0]): -# # print(ontime[1][x//samplerate], ontime[0][x//samplerate]) - # if ontime[0][x//samplerate] != 1 and ontime[1][x//samplerate] != 1 and ontime[2][x//samplerate] != 1: - # for feat_i, feature in enumerate(fish): - # fishes[cl][feat_i][i] = np.nan - # print(x//samplerate, ' ignored') - isi = [isi for isi in np.diff(fishes[cl][0])] - isi.append(isi[-1]) - fishes[cl].append(isi) - #fishes[i] # the structure of the array fishes - # 0 x - # 1 y - # 2 h - # 3 isi - npFishes = fishes - - - # fishfeaturecount = len(fishes[cl]) - # for cl in range(len(np.unique(classlist))-1): - # - # fishlen = len(fishes[cl][0]) - # npFishes[cl]= np.memmap(filename[prefixlen:-4]+"_Fish%d"%cl+ ".npmmp", dtype='float32', mode='w+', shape=(fishfeaturecount, fishlen), order = 'F') - # np.zeros([fishfeaturecount, len(fishes[cl]['x'])]) - # for i, feature in enumerate(['x', 'y', 'h', 'isi']): #enumerate(fishes[cl]): - # if feature == 'isi': - # fishes[cl][feature].append(fishes[cl][feature][-1]) - # npFishes[cl][i] = np.array(fishes[cl][feature]) - # - -# np.set_printoptions(threshold=np.nan) - # - if save == 1 and not os.path.exists('classes/'): - os.makedirs('classes/') - - #np.save('classes/'+ filename[prefixlen:-4]+"_class%d"%i, fish) - #print('this', len(npFishes)) - if save == 1: - with open('classes/'+ filename[prefixlen:-4]+"_classes.lst", "wb") as fp: #Pickling - pickle.dump(npFishes, fp) - #np.savez('classes/'+ filename[prefixlen:-4]+"_classes", npFishes) - else: - npFishes = [] - try: - with open('classes/'+ filename[prefixlen:-4]+"_classes.lst", "rb") as fp: #Pickling - npFishes = pickle.load(fp) - # npFishload=np.load('classes/'+ filename[prefixlen:-4]+"_classes.npz") - print('loaded classes') - except: - print('no classes found') - # for fishes in npFishload.files: - # print('loaded ', fishes) - # for fish in npFishload[fishes]: - # fishtemp = np.zeros([4,len(fish[0])]) - # for i, fishfeature in enumerate(fish): - # fishtemp[i] = fishfeature - # npFishes.append(fishtemp) - #print('npFishes to check structure', npFishes[0][0][0]) -# if not os.path.exists('classes/'): -# os.makedirs('classes/') -# if not os.path.exists('classes/'+ filename[prefixlen:-4]+"_classes_red"): -#np.save('classes/'+ filename[prefixlen:-4]+"_class%d"%i, fish) - if new == 1 or not os.path.exists('classes/'+ filename[prefixlen:-4]+"_classes_red.lst"): -# reducednpFishes = npFishes - reducednpFishes = reduce_classes(npFishes)# reducing classes by putting not overlapping classes together - #print('reduced') - if save == 1: - with open('classes/'+ filename[prefixlen:-4]+"_classes_red.lst", "wb") as fp: #Pickling - pickle.dump(reducednpFishes, fp) - #np.savez('classes/'+ filename[prefixlen:-4]+"_classes_red.npz", reducednpFishes) - else: - with open('classes/'+ filename[prefixlen:-4]+"_classes_red.lst", "rb") as fp: #Pickling - reducednpFishes = pickle.load(fp) - #print('len reduced ', len(reducednpFishes)) - if len(reducednpFishes) == 0: - print('no on-/ or nearbytimeclass with sufficient length or good enough data. quitting') - quit() -# reducednpFishload=np.load('classes/'+ filename[prefixlen:-4]+"_classes_red.npz") -# -# for fishes in reducednpFishload.files: -# print('loaded reduced classes') -# for fish in reducednpFishload[fishes]: -# fishtemp = np.zeros([4,len(fish[0])]) -# for i, fishfeature in enumerate(fish): -# fishtemp[i] = fishfeature -# reducednpFishes.append(fishtemp) -# -# for i, rfish in enumerate(reducednpFishes): -# if not os.path.exists('classes/'): -# os.makedirs('classes/') -# np.save('classes/'+ filename[prefixlen:-4]+"_class%d_reduced"%i, rfish) - #print('reducednpFishes to check structure', reducednpFishes[0][3]) - - - - window_freq = 1 - freqavgsecpath = filename[prefixlen:-4]+"_freqs2.npy" - if new == 1 or not os.path.exists(freqavgsecpath): - print('new freq calcing') - avg_freq = np.zeros([len(reducednpFishes),datalen//(samplerate*window_freq)+1]) - avg_isi = np.zeros([len(reducednpFishes),datalen//(samplerate*window_freq)+1]) - for i, fish in enumerate(reducednpFishes): - fish = np.array(fish) - avg_freqs_temp = [] - avg_isi_temp = [] - peak_ind = 0 - sec = 0 - for secx in np.arange(fish[0][0],fish[0][-1], samplerate*window_freq): - #count_peaks_in_second = ((secx < fish[0]) & (fish[0] < secx+samplerate*window_freq)).sum() - # isimean_peaks_in_second = fish[3][(secx < fish[0]) & (fish[0] < secx+samplerate*window_freq)].mean() # # # # # # # # # Using median instead of mean. Thus, hopefully overgoing outlier-isis, which are due to Peaks hidden beneath stronger Peaks of another fish. - #freq_in_bin = samplerate/isimean_peaks_in_second - sec_peaks = fish[3][(secx <= fish[0]) & (fish[0] < secx+samplerate*window_freq)] - #sec_freq = np.divide(samplerate,sec_peaks) - print(sec_peaks) - if len(sec_peaks) > 0: - #perctop, percbot = np.percentile(sec_peaks, [45, 55]) - #peakisi_in_bin = sec_peaks[(perctop>=sec_peaks)&(sec_peaks>=percbot)].mean() - #print(perctop, percbot, peaks_in_bin) - #isimean_peaks_in_bin = sec_peaks[(perctop >=sec_peaks)&(sec_peaks>=percbot)].mean() - isimean_peaks_in_bin = np.median(sec_peaks) - freq_in_bin = samplerate/isimean_peaks_in_bin - else: freq_in_bin = np.nan - ################################################################################################################################### TODO - #isimean_peaks_in_bin = np.median(fish[3][(secx < fish[0]) & (fish[0] < secx+samplerate*window_freq)]) - print(freq_in_bin) - #freq_in_bin = count_peaks_in_second - if 5 < freq_in_bin < 140: - avg_freqs_temp.append(freq_in_bin) - else: - avg_freqs_temp.append(np.nan) - sec+=1 - #print(sec, freq_in_bin) - # avg_freqs_temp, noiseindice = noisedelete_smoothing(avg_freqs_temp, 3, 2, 100000, 1000) - #avg_freqs_temp, noiseindice = noisedelete_lowpass(avg_freqs_temp, binlen= 10) - avg_freq[i, fish[0][0]//(samplerate*window_freq) : fish[0][0]//(samplerate*window_freq)+sec] = np.array(avg_freqs_temp) - #plt.show() - - - - - - if save == 1: - np.save(freqavgsecpath, avg_freq) - else: - avg_freq = np.load(freqavgsecpath) - print('loaded freqs') - #for i in avg_isi_fish: - # print('avg_freqs_byisi') - # plt.plot(i) - #plt.xlabel('seconds') - #plt.ylabel('isi of peaks') - #plt.show() - # cmap = plt.get_cmap('jet') - # colors =cmap(np.linspace(0, 1.0, 3000)) #len(np.unique(classlist)))) - # np.random.seed(22) - # np.random.shuffle(colors) - # colors = [colors[cl] for cl in range(len(avg_freq_fish))] - # for i, col in zip(avg_freq_fish, colors): - # print('avg_freqs', 'len:' ,len(avg_freq_fish)) - # plt.plot(i, color = col) - # plt.xlabel('seconds') - # plt.ylabel('frequency of peaks') - # plt.show() - ## #print(avg_freqs[0]) - - - window_avg = 1 - ampavgsecpath = filename[prefixlen:-4]+'_amps2.npy' - #freqtime = np.arange(0, len(data), samplerate) - if new == 1 or not os.path.exists(ampavgsecpath): - avg_amps_temp = [] - peak_ind = 0 - - avg_amp = np.zeros([len(reducednpFishes),datalen//(samplerate*window_avg)+1]) - #avg_amp_fish = np.memmap(ampavgsecpath, dtype='float32', mode='w+', shape=(len(reducednpFishes),datalen//samplerate+1)) - - for i, fish in enumerate(reducednpFishes): - if len(fish[0]) >= 20: - #print('amp, ', i, '/', len(reducednpFishes)) - step = 0 - avg_amps_temp = [] - for secx in np.arange(fish[0][0],fish[0][-1], samplerate*window_avg): - amp_in_second = fish[2][(secx < fish[0]) & (fish[0] < secx+samplerate*window_avg)].mean() - # print(i, peak_ind, amp_in_second) - avg_amps_temp.append(amp_in_second) - step+=1 - #print('avg_amps_temp', avg_amps_temp) - #avg_amps = np.memmap(ampavgsecpath, dtype='float32', mode='w+', shape=(len(avg_amps_temp), )) - #avg_amps[:] = avg_amps_temp - - avg_amps_temp = np.array(avg_amps_temp) - avg_amps_temp[np.where(np.isnan(avg_amps_temp))] = 0.0 - avg_amp[i, fish[0][0]//(samplerate*window_avg) : fish[0][0]//(samplerate*window_avg)+step] = avg_amps_temp - - if save == 1: - np.save(ampavgsecpath, avg_amp) -# np.save(ampavgsecpath, avg_amp_fish) - # print('avg_amps ',avg_amps) - #avg_freqs.append(np.mean(eods_freq[i:i+samplerate])) - else: - #avg_amps = np.memmap(ampavgsecpath, dtype='float32', mode='r', shape=(data//samplerate)) - avg_amp = np.load(ampavgsecpath) - print('loaded amp') - - if new == 1 or plot == 1 : - # Plotting ####################################################################################################################### - ################################################################################################################################## - - import matplotlib.gridspec as gridspec - gs = gridspec.GridSpec(2, 2, height_ratios=(1, 1), width_ratios=(1, 0.02)) - - # Tuning colors - maxfreq = 140 - coloroffset = 5 - # Colorbar Choice - cmap = plt.get_cmap('magma')#'gist_rainbow') - cmap_amp = plt.get_cmap('Blues')#'gist_rainbow') - # Colorbar Workaround - Z = [[0,0],[0,0]] - min, max = (0, maxfreq) - step = 1 - levels = np.arange(min,max+step,step) - CS3 = plt.contourf(Z, levels, cmap=cmap) - plt.clf() - plt.close() - ##################### - # Colorbar Workaround - Z = [[0,0],[0,0]] - min, max = (0, 1) - step = 1/100 - levels = np.arange(min,max+step,step) - CSa = plt.contourf(Z, levels, cmap=cmap_amp) - plt.clf() - plt.close() - ##################### - # mapping colormap onto fixed array of frequencyrange - step = 1/maxfreq - collist = cmap(np.arange(0, 1+step, step)) - ampstep = 1/200 - collist_amp = cmap_amp(np.arange(0, 1+ampstep, ampstep)) - collist_amp = collist_amp[100:]#[::-1] - print(collist[0], collist[-1], collist[140]) - - plt.rcParams['figure.figsize'] = 20,4.45 - ampax = plt.subplot(gs[1,:-1]) - #freqax = ampax.twinx() - freqax = plt.subplot(gs[0,:-1], sharex=ampax) - barax = plt.subplot(gs[1,-1]) - ampbarax = plt.subplot(gs[0,-1]) - avg_freq[ avg_freq == 0 ] = np.nan - avg_amp[ avg_amp == 0 ] = np.nan - # colorlist = np.zeros([len(avg_freq)]) - # valuecount = 0 - - # remove amp where freq is np.nan - # might actually not belong in the plotting section.. - #for f, a in zip(avg_freq, avg_amp): - # a[np.isnan(f)] = np.nan - - for f, a in zip(avg_freq, avg_amp): - myred='#d62728' - myorange='#ff7f0e' - mygreen='#2ca02c' - mylightgreen="#bcbd22" - mygray="#7f7f7f" - myblue='#1f77b4' - mylightblue="#17becf" - newlightblue = "#e1f7fd" - # getting the right color for each scatterpoint - fc = f[~np.isnan(f)] - #collist = np.append(np.array([collist[0,:]]*30),(collist[30:]), axis = 0) - fc[fc > maxfreq] = maxfreq - #fc[fc < coloroffset] = 0 - #collist = np.append(np.array([collist[0,:]]*coloroffset),(collist[coloroffset:]), axis = 0) - #col = [collist[v-coloroffset] if c >= coloroffset else collist[0] for v in fc if coloroffset <= v <= maxfreq] - col = [collist[int(v)] for v in fc] - ampcol = [collist_amp[int(v*100/2)] for v in a[~np.isnan(a)]] - # plotting - l1 = ampax.scatter(np.arange(0, len(a)*window_avg, window_avg) ,a, s = 1,label = 'amplitude', color = col)#colors[col], ls = ':') - l2 = freqax.scatter(np.arange(0,len(f)*window_freq,window_freq),f, s = 1, label = 'frequency', color = ampcol)#colors[col]) - # ls = l1+l2 - #labels = [l.get_label() for l in ls] - # ampax.legend(ls, labels, loc=0) - ampax.set_xlabel('Time [s]') - ampax.set_ylabel('amplitude of peaks') - freqax.set_ylabel('frequency of peaks') - freqbar =plt.colorbar(CS3, cax = barax) - ampbar = plt.colorbar(CSa, cax = ampbarax ) - freqbar.set_ticks([0,20,40,60,80,100,120]) - ampbar.set_ticks([0,0.2,0.4,0.6,0.8,1.0,1.2,1.4,1.8]) - ampbar.set_clim(-1,1) - freqax.set_xlim(0,len(a)*window_avg) - freqax.set_ylim(0,maxfreq) - ampax.set_xlim(0, len(a)*window_avg) - ampax.set_ylim(0,2) - plt.setp(freqax.get_xticklabels(), visible=False) - # remove last tick label for the second subplot - yticks = ampax.yaxis.get_major_ticks() - yticks[-1].label1.set_visible(False) - plt.subplots_adjust(hspace=.0) - print('plot', plot) - if plot == 1: - print('show plot') - plt.show() - if save == 1: - plt.savefig(filename[prefixlen:-4]+'_AmpFreq5.pdf') - else: - print('already saved figure, if you want to see the result start with plot == 1') - - -def bin_percentilediff(data, binlen): - data = np.array(data) - return np.percentile(data[:(data.size // binlen) * binlen].reshape(-1, binlen),95, axis=1) - np.percentile(data[:(data.size // binlen) * binlen].reshape(-1, binlen), 5 , axis=1) - -def bin_mean(data, binlen): - return np.mean(data[:(data.size // binlen) * binlen].reshape(-1, binlen),axis=1) - # window_bigavg = 300 - # big_bin = [] - # for i in np.arange(0,len(avg_freq[0]),window_bigavg): # print('iiii?', i) - # collector = [] - # for f, a, col in zip(avg_freq, avg_amp, colorlist): - # for data in f[i//window_freq:(i+window_bigavg)//window_freq]: - # if data != 0 and not np.isnan(data): - # collector.append(data) - # print(collector) - # if len(collector) >100: - # big_bin.append(collector) - # for part in big_bin: - # print('i') - # plt.hist(part, bins = 250, range = (0,250)) - # plt.show() - - -def bin_ratio_std_mean(array, binlen): - #print( bin_array_std(array, binlen)/bin_array_mean(array,binlen) ) - mean, std, d2, d8 = bin_array_mean(array,binlen) - #print('mean, std, d2, d8', mean, std, d2, d8) - return mean * 2 > d8 > mean > d2 > mean * 0.5 - - -def bin_array_std(array, binlen): - bins = len(array)//binlen - stds = np.zeros((bins+1)) - #print(array[0: binlen]) - for i in range(len(stds)): - stds[i] = np.std(array[i*binlen: (i+1)*binlen]) - #print('stds0', stds[0], len(array)) - return stds - - -def bin_array_mean(array, binlen): - bins = len(array)//binlen +1 if len(array) % binlen != 0 else len(array)//binlen - means = np.zeros((bins)) - #print(array[0: binlen]) - stds = np.zeros((bins)) - d2 = np.zeros((bins)) - d8 = np.zeros((bins)) - for i in range(bins): - stds[i] = np.std(array[i*binlen: (i+1)*binlen]) - means[i] = np.median(array[i*binlen: (i+1)*binlen]) - d2[i] = np.percentile(array[i*binlen: (i+1)*binlen], 20) - d8[i] = np.percentile(array[i*binlen: (i+1)*binlen], 80) - - # means[i] = np.mean(array[i*binlen: (i+1)*binlen]) - #print('mean0',means[0], len(array)) - return means, stds, d2, d8 - - - - -def bin_ndarray(ndarray, new_shape, operation='sum'): - """ - Bins an ndarray in all axes based on the target shape, by summing or - averaging. - - Number of output dimensions must match number of input dimensions and - new axes must divide old ones. - - Example - ------- - >>> m = np.arange(0,100,1).reshape((10,10)) - >>> n = bin_ndarray(m, new_shape=(5,5), operation='sum') - >>> print(n) - - [[ 22 30 38 46 54] - [102 110 118 126 134] - [182 190 198 206 214] - [262 270 278 286 294] - [342 350 358 366 374]] - - """ - operation = operation.lower() - if not operation in ['sum', 'mean', 'std']: - raise ValueError("Operation not supported.") - if ndarray.ndim != len(new_shape): - raise ValueError("Shape mismatch: {} -> {}".format(ndarray.shape, - new_shape)) - compression_pairs = [(d, c//d) for d,c in zip(new_shape, - ndarray.shape)] - - #print(len(new_shape)) - flattened = [l for p in compression_pairs for l in p] - - ndarray = ndarray.reshape(len(flattened)) - for i in range(len(new_shape)): - op = getattr(ndarray, operation) - ndarray = op(-1*(i+1)) - return ndarray - - - - - - -def fill_hidden_3(fishes, eods, filename): - fishes = fishes - #print('hidden_calcing...') - nohidefishes = [] - for cl, fish in enumerate(fishes): - #print('Step1: Fish ', cl, ' ', cl, ' / ', len(fishes)) - #f = np.memmap(filename[prefixlen:-4]+"_Fish%d"%cl+ "X.npmmp", dtype='float32', mode='w+', shape=(3,len(fish[0])*2), order = 'F') - f = np.zeros([3, len(fish[0])*2]) - fishisi = np.diff(fish[0]) - isi = fishisi[0] - lst_offst =0 - for i, newisi in enumerate(fishisi): - # print(cl, ' ..currently peak ', i, ' / ' , len(fishisi)) - newi = i+lst_offst - if newi > len(f[0])-1: # Errör - # print('Oh shit, nparray to small. doubling size') - f_new = np.empty([3,len(f[0])*2]) - f_new[:,:len(f[0])]=f - f = f_new - f[0][newi]=fish[0][i] - f[1][newi]=fish[1][i] - f[2][newi]=fish[2][i] - -# print(i, newi) - - - # print(cl, fish[0][i], isi, newisi) - if newisi > 2.8*isi: - guessx = fish[0][i] + isi - while guessx < fish[0][i] + newisi-0.8*isi: - peakx = peakaround3(guessx, isi*0.1, eods) - if peakx is not None: - newi = i+lst_offst - f[0][newi+1]=peakx - f[1][newi+1]=fish[1][i] - f[2][newi+1]=fish[2][i] - #print('estimated hidden peak: ', f[0][newi+1], f[2][newi+1]) - guessx = peakx + isi + (peakx-guessx) - lst_offst +=1 - #print('offset+1 at' ,i , peakx) - continue - break - isi = newisi - - - - nohidefishes.append(np.array([f[0,0:newi+1],f[1,0:newi+1],f[2,0:newi+1]])) - - - #print(x[0], x[200]) - return nohidefishes - - -def fill_hidden_Not(fishes, eods, filename): - fishes = fishes - #print('hidden_calcing...') - nohidefishes = [] - #for cl, fish in enumerate(fishes): - #print('Step1: Fish ', cl, ' ', cl, ' / ', len(fishes)) - #f = np.memmap(filename[prefixlen:-4]+"_Fish%d"%cl+ "X.npmmp", dtype='float32', mode='w+', shape=(3,len(fish[0])*2), order = 'F') - return nohidefishes - -def noisedelete_smoothing(array, binlen, method, thr1, thr2): - if len(array) <= 2: - if np.mean(array) > 140: - for a in array: - a = np.nan - return array, np.arange(0, len(array), 1) - temp_classisi = array - if len(array) > 11: - smoothed = savgol_filter(temp_classisi, 11, 1) - else: smoothed = savgol_filter(temp_classisi, 3, 1) - diff = np.square(smoothed-temp_classisi) - data = np.array(diff) - #plt.plot(diff, color = 'green') - result = np.median(data[:(data.size // binlen) * binlen].reshape(-1, binlen),axis=1) - result2 = bin_percentilediff(temp_classisi, binlen) - if method == 1: - result = result - elif method == 2: - result = result2 - if len(result) > 7: - smoothedresult = savgol_filter(result, 7, 1) - else: - smoothedresult = result - #plt.plot(np.arange(0,len(result)*binlen, binlen),result) - #plt.plot(smoothed) - #plt.plot(np.arange(0,len(result2)*20, 20), smoothedresult2) - #plt.plot(np.arange(0,len(result2)*20, 20), result2) - # plt.plot(temp_classisi, color = 'black') - # plt.plot(np.arange(0, len(result)*binlen, binlen),smoothedresult, 'red') - if method ==1 : - noiseindice = np.where(smoothedresult > thr1) - elif method == 2: - noiseindice = np.where(result > thr2)[0] - elif method == 3: - noiseindice = np.where(data > 1000) - print(noiseindice) - noiseindice = np.multiply(noiseindice, binlen) - print(noiseindice) - noiseindice = [x for i in noiseindice for x in range(i, i+binlen)] - print(np.diff(noiseindice)) - noiseindice = np.split(noiseindice, np.where((np.diff(noiseindice) != 1 ) & (np.diff(noiseindice) != 2) & (np.diff(noiseindice) != 3))[0]+1 ) - #print(noiseindice) - noiseindice = [x for arr in noiseindice if len(arr) > 1 for x in arr] - noiseindice= np.array(noiseindice) - #print(noiseindice) - array = np.array(array) - # Noise delete applial - if np.median(array) > 150: - noiseindice = np.arange(0, len(array), 1) - if len(noiseindice) > 0: - array[noiseindice] = np.nan - return array, noiseindice - -def noisedelete_lowpass(array,binlen): - origarray = array - if len(array) <= 5: - if np.mean(array) > 140 or np.mean(array) < 15: - for a in array: - a = np.nan - return array, [] #np.arange(0, len(array), 1) - array = np.array(array) - from scipy.signal import butter, lfilter - indice = [] - alldata = np.empty_like(array) - if len(array[np.isnan(array)]) > 0: - arrays = np.split(array, np.where(np.abs(np.diff(np.isnan(array))) == 1)[0]+1) - indice = np.where(np.abs(np.diff(np.isnan(array))) == 1)[0]+1 - indice = np.append(np.array([0]),indice) - else: - arrays = [array] - indice = [0] - for array,index in zip(arrays, indice): - if len(array) <2 or len(array[np.isnan(array)]) > 0: - alldata[index:index + len(array)] = array[:] - continue - print(array, 'array') - fs = 100 - cutoff = 25 - binlen = binlen - data = np.array(array, dtype = 'float64') - overlap = len(data)%binlen - if overlap > 0: - data = np.append(data, np.array([data[-1]]*(binlen-overlap)), axis = 0) - dataext = np.empty([data.shape[0]+20]) - dataext[:10]= data[0] - dataext[-10:] = data[-1] - dataext[10:-10]=data - B, A = butter(1, cutoff/ (fs / 2), btype = 'low') - #lpf_array = np.empty_like(dataext) - lpf_array= lfilter(B, A, dataext, axis = 0) - lpf_array = lfilter(B, A, lpf_array[::-1])[::-1] - lpf_binned_array = lpf_array[:(data.size // binlen) * binlen].reshape(-1, binlen) - lpf_array = lpf_array[10:-10] - if overlap > 0: - lpf_array[-(binlen-overlap):] = np.nan - data[-(binlen-overlap):] = np.nan - binned_array = data[:(data.size // binlen) * binlen].reshape(-1, binlen) - lpf_binned_array = lpf_array[:(data.size // binlen) * binlen].reshape(-1, binlen) - filterdiffs = np.empty([binned_array.shape[0]]) - #a = signal.firwin(1, cutoff = 0.3, window = "hamming") - for i, (bin_content, bin_filtered) in enumerate(zip(binned_array, lpf_binned_array)): - if i == binned_array.shape[0] - 1: - bin_content = bin_content[:-(binlen-overlap)] - bin_filtered = bin_filtered[:-(binlen-overlap)] - filterdiffs[i] = np.mean(np.square(np.subtract(bin_filtered[~np.isnan(bin_filtered)], bin_content[~np.isnan(bin_content)]))) - # filterdiff = filterdiff / len(bin_content) - print(filterdiffs) - binned_array[filterdiffs > 1, :] = np.nan - if overlap > 0: - data = binned_array.flatten()[:-(binlen-overlap)] - else: - data = binned_array.flatten() - print(data, 'data') - alldata[index:index + len(data)] = data - # twin[np.isnan(data)] = np.nan - # plt.plot(alldata, color = 'red') - # plt.plot(np.add(origarray, 2), color = 'blue') - # plt.ylim(0, 150) - # plt.show() - return alldata, [] - - # noiseindice = np.multiply(noiseindice, binlen) - # print(noiseindice) - # noiseindice = [x for i in noiseindice for x in range(i, i+binlen)] - # print(np.diff(noiseindice)) - # noiseindice = np.split(noiseindice, np.where((np.diff(noiseindice) != 1 ) & (np.diff(noiseindice) != 2) & (np.diff(noiseindice) != 3))[0]+1 ) - - # #print(noiseindice) - # noiseindice = [x for arr in noiseindice if len(arr) > 1 for x in arr] - # noiseindice= np.array(noiseindice) - # #print(noiseindice) - # array = np.array(array) - # # Noise delete applial - # if np.median(array) > 150: - # noiseindice = np.arange(0, len(array), 1) - # if len(noiseindice) > 0: - # array[noiseindice] = np.nan - # return array, noiseindice - - -def peakaround3(guessx, interval, eods): - pksinintv = eods[0][ ((guessx-interval < eods[0]) & (eods[0] < guessx+interval))] - if len(pksinintv)>0: - return(pksinintv[0]) - elif len(pksinintv) >1: - pksinintv = pksinintv[np.argmin(abs(pksinintv - guessx))] - return(pksinintv) ## might be bad, not tested - # for px in fish[0]: - # distold = interval - # if px < guessx-interval: - # continue - # # print('in area', guessx-interval) - # if guessx-interval < px < guessx+interval: - # found = True - # dist = px-guessx - # if abs(dist) < abs(distold): - # distold = dist - # if px > guessx+interval: - # - # if found == True: - # print(guessx, dist) - # time.sleep(5) - # return guessx + dist - # - # else: - # - # break - return None - - - -def fill_holes(fishes): #returns peakx, peaky, peakheight # Fills holes that seem to be missed peaks in peakarray with fake (X/Y/height)-Peaks - retur = [] - lost = [] - - #print('fill_holes fishes', fishes) - - for cl, fish in enumerate(fishes): - #print('Step2: Fish', cl) - fishisi = np.diff(fish[0]) - mark = np.zeros_like(fishisi) - isi = 0 - #print('mark', mark) - # print('fishisi' , fishisi) - #find zigzag: - c=0 - c0= 0 - n=0 - for i, newisi in enumerate(fishisi): - # print(newisi, isi) - if abs(newisi - isi)>0.15*isi: ## ZigZag-Detection : actually peaks of two classes in one class - leads to overlapping frequencys which shows in a zigzag pattern - if (newisi > isi) != (fishisi[i-1] > isi): - c+=1 - # print(abs(newisi - isi), 'x = ', fish[i].x) - c0+=1 - elif c > 0: - n += 1 - if n == 6: - if c > 6: - # print ('zigzag x = ', fish['x'][i-6-c0], fish['x'][i-6]) - mark[i-6-c0:i-6]= -5 - c = 0 - c0=0 - n = 0 - - #if c > 0: - # print(i, c) - # if c == 6: - # print('zigzag!') - isi = newisi - isi = 0 - for i, newisi in enumerate(fishisi): ## fill holes of up to 3 Peaks # Changed to: Only up to 1 Peak because : Holes might be intended for communicational reasons - #print('mark: ' , mark) - if mark[i] == -5: continue - if i+2 >= len(fishisi): - continue - if (2.2*isi > newisi > 1.8*isi) and (1.5*isi>fishisi[i+1] > 0.5*isi) : - mark[i] = 1 - isi = newisi - # print('found 1!' , i) - elif (2.2*isi > newisi > 1.8*isi) and (2.2*isi> fishisi[i+1] > 1.8*isi) and (1.5*isi > fishisi[i+2] > 0.5*isi): - mark[i] = 1 - isi = isi - #elif 3.4*isi > newisi > 2.6*isi and 1.5*isi > fishisi[i+1] > 0.5*isi: - # mark[i] = 2 - - elif (0.6* isi > newisi > 0): - # print('-1 found', i ) - if mark[i] ==0 and mark[i+1] ==0 and mark[i-1]==0 : - # isi newisi - # continue - # print('was not already set') - if fishisi[i-2] > isi < fishisi[i+1]: - mark[i] = -1 - # print('-1') - elif isi > fishisi[i+1] < fishisi[i+2]: - mark[i+1] = -1 - # print('-1') - isi = newisi - x = [] - y = [] - h = [] - x_lost=[] - y_lost=[] - h_lost=[] - # print('filledmarks: ', mark) - for i, m in enumerate(mark): - if m == -1 : - # print('-1 at x = ', fish['x'][i]) - continue - if m == -5: - x_lost.append(fish[0][i]) - y_lost.append(fish[1][i]) - h_lost.append(fish[2][i]) - x.append(fish[0][i]) - y.append(fish[1][i]) - h.append(fish[2][i]) - continue - x.append(fish[0][i]) - y.append(fish[1][i]) - h.append(fish[2][i]) - if m == 1: - # print('hofly added peak at x = ' , fish['x'][i]) - x.append(fish[0][i] + fishisi[i-1]) - y.append( 0.5*(fish[1][i]+fish[1][i+1])) - h.append(0.5*(fish[2][i]+fish[2][i+1])) - elif m== 2: - x.append(fish[0][i] + fishisi[i]) - y.append( 0.5*(fish[1][i]+fish[1][i+1])) - h.append(0.5*(fish[2][i]+fish[2][i+2])) - x.append(fish[0][i] + 2*fishisi[i-1]) - y.append( 0.5*(fish[1][i]+fish[1][i+2])) - h.append(0.5*(fish[2][i]+fish[2][i+2])) - # print('added at x = ', fish[0][i] + fishisi[i]) - x = np.array(x) - y= np.array(y) - h = np.array(h) - x_lost = np.array(x_lost) - y_lost = np.array(y_lost) - h_lost = np.array(h_lost) - #print('retur', x, y, h) - retur.append([x,y,h]) - lost.append([x_lost,y_lost,h_lost]) - # filledpeaks =np.array(filledpeaks) - # print(filledpeaks.shape) - # filledpeaks. - - return retur, lost - - -# eods[-len(thisblock_eods[:,]):] = thisblock_eods -# eods = np.memmap("eods_"+filename[:-3]+"npy", dtype='float32', mode='r+', shape=(4,eods_len)) - #fp = np.memmap(filepath[:-len(filename)]+"eods_"+filename[:-3]+"npy", dtype='float32', mode='r+', shape=(4,len(thisblock_eods[:,]))) - #nix print( b.data_arrays) - # for cl in np.unique(cllist): - # currentfish_x = x[:][cllist == cl] - # currentfish_y = y[:][cllist == cl] - # currentfish_h d= x[:][cllist == cl] - #nix try: - #nix xpositions[cl] = b.create_data_array("f%d_eods" %cl, "spiketimes", data = currentfish_x) - #nix xpositions[cl].append_set_dimension() - #nix # thisfish_eods = b.create_multi_tag("f%d_eods_x"%cl, "eods.position", xpositions[cl]) - #nix # thisfish_eods.references.append(nixdata) - #nix except nix.pycore.exceptions.exceptions.DuplicateName: - #nix - #nix xpositions[cl].append(currentfish_x) - - - #thisfish_eods.create_feature(y, nix.LinkType.Indexed) - #b.create_multi_tag("f%d_eods_y"%cl, "eods.y", positions = y) - #b.create_multi_tag("f%d_eods_h"%cl, "eods.amplitude", positions = h) - #thisfish_eods.create_feature - - #nix file.close() - # Save Data - # Needed: - # Meta: Starttime, Startdate, Length - # x, y, h, cl, difftonextinclass -> freq ? , - - # Later: Find "Nofish" - # Find "Twofish" - # Find "BadData" - # Find "Freqpeak" - # ? Find "Amppeak" - # - - # bigblock = np.array(bigblock) - # x=xarray(bigblock) - # y=yarray(bigblock) - # cl=clarray(bigblock) - - - #nix file = nix.File.open(file_name, nix.FileMode.ReadWrite) - #nix b = file.blocks[0] - #nix nixdata = b.data_arrays[0] - #nix cldata = [] - #nix print(classes) - #nix print(b.data_arrays) - #nix for i in range(len(np.unique(classes))): - #nix cldata.append(b.data_arrays[i+1]) - - - # for cl in - - # for cl in - # x = thisfish_eods - - - #nix file.close() - - - -def reduce_classes(npFishes): - offtimeclasses = [] - for i, fish in enumerate(npFishes): - fish = np.array(fish) - #print(fish[0]) - # print('nüFishes before and after command') - # print('bef', npFishes[i][0][0]) - # print(fish[:,:][:,np.where(~np.isnan(fish[0]))].reshape(4,-1)) - npFishes[i] = fish[:,:][:,np.where(~np.isnan(fish[0]))][:,0] - # print('after', npFishes[i][0][0]) - if len(npFishes[i][0]) <= 100: - offtimeclasses.append(i) - #print('delete class ', i) - #print('Len offtime vs len Fishes', len(offtimeclasses), len(npFishes)) - for index in sorted(offtimeclasses, reverse=True): - del npFishes[index] - #print('npFishes to check features', npFishes[0][3]) - srt_beg = sort_beginning(npFishes) - # print(len(npFishes[0])) - # print(len(srt_beg)) - #srt_end = sort_ending(npFishes) - if len(srt_beg) >= 1: - reduced = [] - reduced.append(srt_beg[0]) - #for i, fish in enumerate(srt_beg): - #print(len(srt_beg)) - #print('reducing classes') - for i in range(1, len(srt_beg)): - #print('.', end = '') - cl = 0 - reducedlen_beg = len(reduced) - while cl < reducedlen_beg: - cond1 = reduced[cl][0][-1] < srt_beg[i][0][0] - cond2 = False - nxt=i+1 - while nxt < len(srt_beg) and srt_beg[i][0][-1] > srt_beg[nxt][0][0]: #part ends after another part started (possibly in the other part. - if len(srt_beg[nxt][0]) > len(srt_beg[i][0]):# -> lencheck to pick longer part) - reduced.append(srt_beg[i]) - # print('case1') - break - nxt+=1 - else: - cond2 = True - # print('lenreduced', len(reduced), len(srt_beg)) - #print(i, cl, cond1, cond2 ) - if cond1 and cond2: - #print(reduced[cl].shape, srt_beg[i].shape) - reduced[cl] = np.concatenate((reduced[cl],srt_beg[i]), axis=1) - #print(len(reduced[cl][0]), len(srt_beg[i][0])) - cl+=1 - break - if cond2 == False: - break - cl+=1 - else: - reduced.append(srt_beg[i]) - - #print('len red', len(reduced)) - #print(len(npFishes[0])) - return reduced - else: - return [] - -def sort_beginning(npFishes): - srted = npFishes - srted.sort(key=lambda x: x[0][0]) - #for i in srted[0][0]: - # print(i) - - return srted - -def sort_ending(npFishes): - srted = npFishes[:] - srted.sort(key=lambda x: x[0][-1]) - return srted - -def noisedscrd(fishes): - for fish in fishes: - print(np.std(fish[2])) - - -def plot_ontimes(ontime): - plt.fill_between(range(len(ontime[0])), ontime[0], color = '#1e2c3c', label = 'close') #'#324A64' - plt.fill_between(range(len(ontime[1])), ontime[1], color = '#324A64', label = 'nearby') - plt.fill_between(range(len(ontime[2])), ontime[2], color = '#8ea0b4', label = 'far') - plt.xlabel('seconds') - plt.ylabel('position') - plt.legend(loc = 1) - plt.ylim(0,1.5) - # plt.xlim(0,len()) - plt.show() - - - -if __name__ == '__main__': - main() diff --git a/thunderfish/DextersThunderfishAddition/analyzeEods_lowpass.py b/thunderfish/DextersThunderfishAddition/analyzeEods_lowpass.py deleted file mode 100644 index 41d8d039..00000000 --- a/thunderfish/DextersThunderfishAddition/analyzeEods_lowpass.py +++ /dev/null @@ -1,1130 +0,0 @@ -import sys -import numpy as np -import copy -from scipy.stats import gmean -from scipy import signal -from scipy import optimize -import matplotlib.pyplot as plt -import matplotlib.colors as mplcolors -from thunderfish.dataloader import open_data -from thunderfish.peakdetection import detect_peaks -from scipy.interpolate import interp1d -from scipy.signal import savgol_filter -from collections import deque -import nixio as nix -import time -import os -import pickle - - -deltat = 60.0 # seconds of buffer size -thresh = 0.05 -mind = 0.1 # minimum distance between peaks -peakwidththresh = 30 # maximum distance between max(peak) and min(trough) of a peak, in datapoints -new = 0 - -def main(): ############################################################# Get arguments eodsfilepath, plot, (opt)save, (opt)new - - filepath = sys.argv[1] - sys.argv = sys.argv[1:] - - - plot = 0 - save = 0 - print(sys.argv) - if len(sys.argv)==2: - plot = int(sys.argv[1]) - print(plot) - if len(sys.argv)==3: - plot = int(sys.argv[1]) - save = int(sys.argv[2]) - print('saving results: ', save) - import ntpath - if len(sys.argv)==4: - plot = int(sys.argv[1]) - save = int(sys.argv[2]) - new = int(sys.argv[3]) - print('saving results: ', save) - ntpath.basename("a/b/c") - def path_leaf(path): - head, tail = ntpath.split(path) - return tail or ntpath.basename(head) - filename = path_leaf(filepath) - prefixlen = filename.find('_')+1 - starttime = "2000" - home = os.path.expanduser('~') - path = filename[prefixlen:-4]+"/" - os.chdir(home+'/'+path) # operating in directory home/audiofilename/ - if os.path.exists(filename[prefixlen:-4]+"_freqs2_lp.npy"): - if os.path.getsize(filename[prefixlen:-4]+"_freqs2_lp.npy") > 0: - print('exists already') - quit() - else: - print('found a 0!') - - # if os.path.exists(filename[prefixlen:-4]+'_AmpFreq5_lp.pdf'): - # new = 0 - - with open_data(filename[prefixlen:-4]+".WAV", 0, 60, 0.0, 0.0) as data: - samplerate = data.samplerate - datalen = len(data) - - ############################################################# Fileimport and analyze; or skip, if analyzed data already exists - if new == 1 or not os.path.exists('classes/'+ filename[prefixlen:-4]+"_classes.npz"): - print('new analyse') - eods = np.load(filename, mmap_mode='c') - - # time1 = 40000 - # time2 = 45000 - # time1x = time1 * samplerate - # time2x = time2 * samplerate - # startpeak = np.where(((eods[0]>time1x)&(eods[0]= 100 and i % (classamount//100) == 0: - print(i) - fishclass = eods[:,:][: , classlist == num] - fish = [] - if len(fishclass[0]) < 12: - continue - for i , feature in enumerate(fishclass): - if i != 3: - fish.append(feature) -# print('fish - printing to check structure', fish) - temp_classisi = np.diff(fishclass[0]) - #print(temp_classisi) - #print('plot smooth vs orig', len(temp_classisi)) - binlen=10 - temp_classisi_medians = temp_classisi#bin_median(temp_classisi, 1) - smoothed = savgol_filter(temp_classisi_medians,11,1) - diff = np.square(smoothed-temp_classisi_medians) - data = np.array(diff) - result = np.median(data[:(data.size // binlen) * binlen].reshape(-1, binlen),axis=1) - result2 = bin_percentilediff(temp_classisi, 20) - if len(result) > 7 and len(result2) > 7: - smoothedresult = savgol_filter(result, 7, 1) - smoothedresult2 = savgol_filter(result2, 7, 1) - else: - smoothedresult = result - smoothedresult2 = result2 - #plt.plot(np.arange(0,len(result)*binlen, binlen),result) - #plt.plot(smoothed) - #plt.plot(np.arange(0,len(result2)*20, 20), smoothedresult2) - #plt.plot(np.arange(0,len(result2)*20, 20), result2) - # plt.plot(temp_classisi_medians) - #plt.plot(np.arange(0, len(smoothedresult)*binlen, binlen),smoothedresult) - noiseindice = np.where(smoothedresult > 100000) - #print(noiseindice) - noiseindice = np.multiply(noiseindice, binlen) - #print(noiseindice) - noiseindice = [x for i in noiseindice[0] for x in range(i, i+10)] - print(np.diff(noiseindice)) - noiseindice = np.split(noiseindice, np.where((np.diff(noiseindice) != 1 ) & (np.diff(noiseindice) != 2) & (np.diff(noiseindice) != 3))[0]+1 ) - #print(noiseindice) - noiseindice = [x for arr in noiseindice if len(arr) > 20 for x in arr[50:-51]] - noiseindice= np.array(noiseindice) - #print(noiseindice) - fish = np.array(fish) - # Noise delete applial - # if len(noiseindice) >0 : - # fish[:,noiseindice] = np.nan #np.setdiff1d(np.arange(0, len(fish[0]),1),(noiseindice))] = np.nan - fish = list(fish) - #plt.plot(temp_classisi) - # plt.show() - binlen = 60 - #print(len(fish[0])) - if discardcondition1(fish) == False: # condition length < 10 - # if False: - mean, std, d2, d8 = bin_array_mean(temp_classisi,binlen) - # print('mean, std, d2, d8', mean, std, d2, d8) - count = ((mean * 4 >= d8) * (d2 >= mean * 0.25)) .sum() # condition_2 : if 0.2, and 0.8 deciles of the ISI of ONE SECOND/binlen are in the area of the median by a factor of 2, then the class seems to have not too much variability. - # Problem: Case, Frequency changes rapidly during one second/binlen , then the 0.8 or 0.2 will be out of the area... - # But then there is one wrong estimation, not too much of a problem - #print('fish') - # if count >= 0.5*(len(temp_classisi)//binlen +1): - if True: - fishes.append(fish) - #print('len fishes after append', len(fishes)) - #print('printing fishes to check structure', fishes[0][0]) - #ontimes = np.load('ontime'+filename[prefixlen:-4]+'.npz') - #ontime = [] - ## for c, items in enumerate(ontimes.items()): - ## ontime.append(items[1]) - #ontime.append(ontimes['on']) - #ontime.append(ontimes['near']) - #ontime.append(ontimes['far']) - - #if plot == 1: - # plot_ontimes(ontime) - - #print(eods[0][-1]//samplerate, len(ontime[0])) - if fishes is not None: - - #for fish in fishes: - # fish[0] - - # improving the fishpeak-data by adding peaks at places where theses peaks are hidden behind other (stronger)peaks - #fishes = fill_hidden_3(fishes, eods, filename) # cl-dict : x y z -dict - # filling holes or removing unexpected peaks from the class which are most likely caused by false classification - #fishes, weirdparts = fill_holes(fishes) - #fishes, weirdparts = fill_holes(fishes) - - if fishes is not None: - if len(fishes) > 0: - for cl, fish in enumerate(fishes): - ### Filter to only get ontimes close and nearby - for i, x in enumerate(fish[0]): - print(x) - #if x//samplerate < len(ontime[0]): -# # print(ontime[1][x//samplerate], ontime[0][x//samplerate]) - # if ontime[0][x//samplerate] != 1 and ontime[1][x//samplerate] != 1 and ontime[2][x//samplerate] != 1: - # for feat_i, feature in enumerate(fish): - # fishes[cl][feat_i][i] = np.nan - # print(x//samplerate, ' ignored') - isi = [isi for isi in np.diff(fishes[cl][0])] - isi.append(isi[-1]) - fishes[cl].append(isi) - #fishes[i] # the structure of the array fishes - # 0 x - # 1 y - # 2 h - # 3 isi - npFishes = fishes - - - # fishfeaturecount = len(fishes[cl]) - # for cl in range(len(np.unique(classlist))-1): - # - # fishlen = len(fishes[cl][0]) - # npFishes[cl]= np.memmap(filename[prefixlen:-4]+"_Fish%d"%cl+ ".npmmp", dtype='float32', mode='w+', shape=(fishfeaturecount, fishlen), order = 'F') - # np.zeros([fishfeaturecount, len(fishes[cl]['x'])]) - # for i, feature in enumerate(['x', 'y', 'h', 'isi']): #enumerate(fishes[cl]): - # if feature == 'isi': - # fishes[cl][feature].append(fishes[cl][feature][-1]) - # npFishes[cl][i] = np.array(fishes[cl][feature]) - # - -# np.set_printoptions(threshold=np.nan) - # - if save == 1 and not os.path.exists('classes/'): - os.makedirs('classes/') - - #np.save('classes/'+ filename[prefixlen:-4]+"_class%d"%i, fish) - #print('this', len(npFishes)) - if save == 1: - with open('classes/'+ filename[prefixlen:-4]+"_classes.lst", "wb") as fp: #Pickling - pickle.dump(npFishes, fp) - #np.savez('classes/'+ filename[prefixlen:-4]+"_classes", npFishes) - else: - npFishes = [] - try: - with open('classes/'+ filename[prefixlen:-4]+"_classes.lst", "rb") as fp: #Pickling - npFishes = pickle.load(fp) - # npFishload=np.load('classes/'+ filename[prefixlen:-4]+"_classes.npz") - print('loaded classes') - except: - print('no classes found') - # for fishes in npFishload.files: - # print('loaded ', fishes) - # for fish in npFishload[fishes]: - # fishtemp = np.zeros([4,len(fish[0])]) - # for i, fishfeature in enumerate(fish): - # fishtemp[i] = fishfeature - # npFishes.append(fishtemp) - #print('npFishes to check structure', npFishes[0][0][0]) -# if not os.path.exists('classes/'): -# os.makedirs('classes/') -# if not os.path.exists('classes/'+ filename[prefixlen:-4]+"_classes_red"): -#np.save('classes/'+ filename[prefixlen:-4]+"_class%d"%i, fish) - if new == 1 or not os.path.exists('classes/'+ filename[prefixlen:-4]+"_classes_red.lst"): -# reducednpFishes = npFishes - reducednpFishes = reduce_classes(npFishes)# reducing classes by putting not overlapping classes together - #print('reduced') - if save == 1: - with open('classes/'+ filename[prefixlen:-4]+"_classes_red.lst", "wb") as fp: #Pickling - pickle.dump(reducednpFishes, fp) - #np.savez('classes/'+ filename[prefixlen:-4]+"_classes_red.npz", reducednpFishes) - else: - with open('classes/'+ filename[prefixlen:-4]+"_classes_red.lst", "rb") as fp: #Pickling - reducednpFishes = pickle.load(fp) - #print('len reduced ', len(reducednpFishes)) - if len(reducednpFishes) == 0: - print('no on-/ or nearbytimeclass with sufficient length or good enough data. quitting') - quit() -# reducednpFishload=np.load('classes/'+ filename[prefixlen:-4]+"_classes_red.npz") -# -# for fishes in reducednpFishload.files: -# print('loaded reduced classes') -# for fish in reducednpFishload[fishes]: -# fishtemp = np.zeros([4,len(fish[0])]) -# for i, fishfeature in enumerate(fish): -# fishtemp[i] = fishfeature -# reducednpFishes.append(fishtemp) -# -# for i, rfish in enumerate(reducednpFishes): -# if not os.path.exists('classes/'): -# os.makedirs('classes/') -# np.save('classes/'+ filename[prefixlen:-4]+"_class%d_reduced"%i, rfish) - #print('reducednpFishes to check structure', reducednpFishes[0][3]) - - - - window_freq = 1 - freqavgsecpath = filename[prefixlen:-4]+"_freqs2_lp.npy" - if new == 1 or not os.path.exists(freqavgsecpath): - print('new freq calcing') - avg_freq = np.zeros([len(reducednpFishes),datalen//int((samplerate*window_freq))+1]) - avg_isi = np.zeros([len(reducednpFishes),datalen//int(samplerate*window_freq)+1]) - for i, fish in enumerate(reducednpFishes): - if len(fish[0]) >= 5: - fish = np.array(fish) - avg_freqs_temp = [] - avg_isi_temp = [] - peak_ind = 0 - sec = 0 - for secx in np.arange(fish[0][0],fish[0][-1], samplerate*window_freq): - #count_peaks_in_second = ((secx < fish[0]) & (fish[0] < secx+samplerate*window_freq)).sum() - # isimean_peaks_in_second = fish[3][(secx < fish[0]) & (fish[0] < secx+samplerate*window_freq)].mean() # # # # # # # # # Using median instead of mean. Thus, hopefully overgoing outlier-isis, which are due to Peaks hidden beneath stronger Peaks of another fish. - #freq_in_bin = samplerate/isimean_peaks_in_second - sec_peaks = fish[3][(secx < fish[0]) & (fish[0] < secx+samplerate*window_freq)] - #sec_freq = np.divide(samplerate,sec_peaks) - print(sec_peaks) - if len(sec_peaks) > 0: - perctop, percbot = np.percentile(sec_peaks, [45, 55]) - #peakisi_in_bin = sec_peaks[(perctop>=sec_peaks)&(sec_peaks>=percbot)].mean() - #print(perctop, percbot, peaks_in_bin) - #isimean_peaks_in_bin = sec_peaks[(perctop >=sec_peaks)&(sec_peaks>=percbot)].mean() - isimean_peaks_in_bin = np.median(sec_peaks) - freq_in_bin = samplerate/isimean_peaks_in_bin - else: freq_in_bin = np.nan - ################################################################################################################################### TODO - #isimean_peaks_in_bin = np.median(fish[3][(secx < fish[0]) & (fish[0] < secx+samplerate*window_freq)]) - print(freq_in_bin) - #freq_in_bin = count_peaks_in_second - if 5 < freq_in_bin < 140: - avg_freqs_temp.append(freq_in_bin) - else: - avg_freqs_temp.append(np.nan) - sec+=1 - #print(sec, freq_in_bin) - # avg_freqs_temp, noiseindice = noisedelete_smoothing(avg_freqs_temp, 3, 2, 100000, 1000) - avg_freqs_temp, noiseindice = noisedelete_lowpass(avg_freqs_temp, binlen= 10) - avg_freq[i, int(fish[0][0])//int(samplerate*window_freq) : int(fish[0][0])//int(samplerate*window_freq)+sec] = np.array(avg_freqs_temp) - #plt.show() - - if save == 1: - np.save(freqavgsecpath, avg_freq) - else: - avg_freq = np.load(freqavgsecpath) - print('loaded freqs') - #for i in avg_isi_fish: - # print('avg_freqs_byisi') - # plt.plot(i) - #plt.xlabel('seconds') - #plt.ylabel('isi of peaks') - #plt.show() - # cmap = plt.get_cmap('jet') - # colors =cmap(np.linspace(0, 1.0, 3000)) #len(np.unique(classlist)))) - # np.random.seed(22) - # np.random.shuffle(colors) - # colors = [colors[cl] for cl in range(len(avg_freq_fish))] - # for i, col in zip(avg_freq_fish, colors): - # print('avg_freqs', 'len:' ,len(avg_freq_fish)) - # plt.plot(i, color = col) - # plt.xlabel('seconds') - # plt.ylabel('frequency of peaks') - # plt.show() - ## #print(avg_freqs[0]) - - - window_avg = 1 - ampavgsecpath = filename[prefixlen:-4]+'_amps2_lp.npy' - #freqtime = np.arange(0, len(data), samplerate) - if new == 1 or not os.path.exists(ampavgsecpath): - avg_amps_temp = [] - peak_ind = 0 - - avg_amp = np.zeros([len(reducednpFishes),int(datalen)//int(samplerate*window_avg)+1]) - #avg_amp_fish = np.memmap(ampavgsecpath, dtype='float32', mode='w+', shape=(len(reducednpFishes),datalen//samplerate+1)) - - for i, fish in enumerate(reducednpFishes): - if len(fish[0]) >= 100: - #print('amp, ', i, '/', len(reducednpFishes)) - step = 0 - avg_amps_temp = [] - for secx in np.arange(fish[0][0],fish[0][-1], samplerate*window_avg): - amp_in_second = fish[2][(secx < fish[0]) & (fish[0] < secx+samplerate*window_avg)].mean() - # print(i, peak_ind, amp_in_second) - avg_amps_temp.append(amp_in_second) - step+=1 - #print('avg_amps_temp', avg_amps_temp) - #avg_amps = np.memmap(ampavgsecpath, dtype='float32', mode='w+', shape=(len(avg_amps_temp), )) - #avg_amps[:] = avg_amps_temp - - avg_amps_temp = np.array(avg_amps_temp) - avg_amps_temp[np.where(np.isnan(avg_amps_temp))] = 0.0 - avg_amp[i, int(fish[0][0])//int(samplerate*window_avg) : int(fish[0][0])//int(samplerate*window_avg)+step] = avg_amps_temp - - if save == 1: - np.save(ampavgsecpath, avg_amp) -# np.save(ampavgsecpath, avg_amp_fish) - # print('avg_amps ',avg_amps) - #avg_freqs.append(np.mean(eods_freq[i:i+samplerate])) - else: - #avg_amps = np.memmap(ampavgsecpath, dtype='float32', mode='r', shape=(data//samplerate)) - avg_amp = np.load(ampavgsecpath) - print('loaded amp') - - if new == 1 or plot == 1 or save == 1: - # Plotting ####################################################################################################################### - ################################################################################################################################## - - plt.style.use('../PaperWholeFig.mplstyle') - import matplotlib.gridspec as gridspec - gs = gridspec.GridSpec(2, 2, height_ratios=(1, 1), width_ratios=(1, 0.02), wspace = 0.03) - - # Tuning colors - maxfreq = 140 - coloroffset = 5 - # Colorbar Choice - cmap = plt.get_cmap('magma')#'gist_rainbow') - cmap_amp = plt.get_cmap('Blues')#'gist_rainbow') - # Colorbar Workaround - Z = [[0,0],[0,0]] - min, max = (0, maxfreq) - step = 1 - levels = np.arange(min,max+step,step) - CS3 = plt.contourf(Z, levels, cmap=cmap) - plt.clf() - plt.close() - ##################### - # Colorbar Workaround - Z = [[0,0],[0,0]] - min, max = (0, 1) - step = 1/100 - levels = np.arange(min,max+step,step) - CSa = plt.contourf(Z, levels, cmap=cmap_amp) - plt.clf() - plt.close() - ##################### - # mapping colormap onto fixed array of frequencyrange - step = 1/maxfreq - collist = cmap(np.arange(0, 1+step, step)) - ampstep = 1/200 - collist_amp = cmap_amp(np.arange(0, 1+ampstep, ampstep)) - collist_amp = collist_amp[100:]#[::-1] - print(collist[0], collist[-1], collist[140]) - - # plt.rcParams['figure.figsize'] = 20,4.45 - - ampax = plt.subplot(gs[1,:-1]) - #freqax = ampax.twinx() - freqax = plt.subplot(gs[0,:-1], sharex=ampax) - barax = plt.subplot(gs[1,-1]) - ampbarax = plt.subplot(gs[0,-1]) - avg_freq[ avg_freq == 0 ] = np.nan - avg_amp[ avg_amp == 0 ] = np.nan - # colorlist = np.zeros([len(avg_freq)]) - # valuecount = 0 - - # remove amp where freq is np.nan - # might actually not belong in the plotting section.. - for f, a in zip(avg_freq, avg_amp): - a[np.isnan(f)] = np.nan - - for f, a in zip(avg_freq, avg_amp): - myred='#d62728' - myorange='#ff7f0e' - mygreen='#2ca02c' - mylightgreen="#bcbd22" - mygray="#7f7f7f" - myblue='#1f77b4' - mylightblue="#17becf" - newlightblue = "#e1f7fd" - # getting the right color for each scatterpoint - fc = f[~np.isnan(f)] - #collist = np.append(np.array([collist[0,:]]*30),(collist[30:]), axis = 0) - fc[fc > maxfreq] = maxfreq - fc[fc < coloroffset] = np.nan - #collist = np.append(np.array([collist[0,:]]*coloroffset),(collist[coloroffset:]), axis = 0) - #col = [collist[v-coloroffset] if c >= coloroffset else collist[0] for v in fc if coloroffset <= v <= maxfreq] - for v in fc: - print(v) - col = [collist[int(v)] for v in fc[~np.isnan(fc)]] - ampcol = [collist_amp[int(v*100/2)] for v in a[~np.isnan(a)]] - # plotting - l1 = ampax.scatter(np.arange(0, len(a)*window_avg, window_avg) ,a, s = 1,label = 'amplitude', color = col)#colors[col], ls = ':') - l2 = freqax.scatter(np.arange(0,len(f)*window_freq,window_freq),f, s = 1, label = 'frequency', color = ampcol)#colors[col]) - # ls = l1+l2 - #labels = [l.get_label() for l in ls] - # ampax.legend(ls, labels, loc=0) - ampax.set_xlabel('Time [s]') - ampax.set_ylabel('Amplitude') - freqax.set_ylabel('Frequency') - freqbar = plt.colorbar(CS3, cax = barax) - ampbar = plt.colorbar(CSa, cax = ampbarax) - ampbar.set_clim(-0.3,1) - freqbar.set_ticks([0,30,60,90,120]) - ampbar.set_ticks(np.arange(0,1,0.249)) - ampbar.set_ticklabels([0,0.5,1,1.5,2]) - freqax.set_xlim(0,len(a)*window_avg) - freqax.set_ylim(0,maxfreq) - ampax.set_ylim(0, 2) - ampax.set_xlim(0, len(a)*window_avg) - plt.setp(freqax.get_xticklabels(), visible=False) - # remove last tick label for the second subplot - yticks = ampax.yaxis.get_major_ticks() - yticks[-1].label1.set_visible(False) - plt.subplots_adjust(hspace=.0) - freqax.get_yaxis().set_label_coords(-0.05, 0.5) - ampax.get_yaxis().set_label_coords(-0.05, 0.5) - plt.gcf().subplots_adjust(bottom=0.15) - ampbar.ax.get_yaxis().set_label_coords(4,0.5) - ampbar.ax.set_ylabel('Amplitude', rotation=270) - freqbar.ax.get_yaxis().set_label_coords(4,0.5) - freqbar.ax.set_ylabel('Frequency', rotation=270) - - - # freqax.set_xlim(20600,21800) - # ampax.set_xlim(20600, 21800) - # freqax.set_ylim(80,125) - - print('plot', plot) - if plot == 1: - print('show plot') - plt.show() - if save == 1: - plt.savefig(filename[prefixlen:-4]+'_AmpFreq7_lp.pdf') - print('saved') - plt.clf() - else: - print('already saved figure, if you want to see the result start with plot == 1') - - -def bin_percentilediff(data, binlen): - data = np.array(data) - return np.percentile(data[:(data.size // binlen) * binlen].reshape(-1, binlen),95, axis=1) - np.percentile(data[:(data.size // binlen) * binlen].reshape(-1, binlen), 5 , axis=1) - -def bin_median(data, binlen): - return np.median(data[:(data.size // binlen) * binlen].reshape(-1, binlen),axis=1) -def bin_mean(data, binlen): - return np.mean(data[:(data.size // binlen) * binlen].reshape(-1, binlen),axis=1) - # window_bigavg = 300 - # big_bin = [] - # for i in np.arange(0,len(avg_freq[0]),window_bigavg): # print('iiii?', i) - # collector = [] - # for f, a, col in zip(avg_freq, avg_amp, colorlist): - # for data in f[i//window_freq:(i+window_bigavg)//window_freq]: - # if data != 0 and not np.isnan(data): - # collector.append(data) - # print(collector) - # if len(collector) >100: - # big_bin.append(collector) - # for part in big_bin: - # print('i') - # plt.hist(part, bins = 250, range = (0,250)) - # plt.show() - - -def bin_ratio_std_mean(array, binlen): - #print( bin_array_std(array, binlen)/bin_array_mean(array,binlen) ) - mean, std, d2, d8 = bin_array_mean(array,binlen) - #print('mean, std, d2, d8', mean, std, d2, d8) - return mean * 2 > d8 > mean > d2 > mean * 0.5 - - -def bin_array_std(array, binlen): - bins = len(array)//binlen - stds = np.zeros((bins+1)) - #print(array[0: binlen]) - for i in range(len(stds)): - stds[i] = np.std(array[i*binlen: (i+1)*binlen]) - #print('stds0', stds[0], len(array)) - return stds - - -def bin_array_mean(array, binlen): - bins = len(array)//binlen +1 if len(array) % binlen != 0 else len(array)//binlen - means = np.zeros((bins)) - #print(array[0: binlen]) - stds = np.zeros((bins)) - d2 = np.zeros((bins)) - d8 = np.zeros((bins)) - for i in range(bins): - stds[i] = np.std(array[i*binlen: (i+1)*binlen]) - means[i] = np.median(array[i*binlen: (i+1)*binlen]) - d2[i] = np.percentile(array[i*binlen: (i+1)*binlen], 20) - d8[i] = np.percentile(array[i*binlen: (i+1)*binlen], 80) - - # means[i] = np.mean(array[i*binlen: (i+1)*binlen]) - #print('mean0',means[0], len(array)) - return means, stds, d2, d8 - - - - -def bin_ndarray(ndarray, new_shape, operation='sum'): - """ - Bins an ndarray in all axes based on the target shape, by summing or - averaging. - - Number of output dimensions must match number of input dimensions and - new axes must divide old ones. - - Example - ------- - >>> m = np.arange(0,100,1).reshape((10,10)) - >>> n = bin_ndarray(m, new_shape=(5,5), operation='sum') - >>> print(n) - - [[ 22 30 38 46 54] - [102 110 118 126 134] - [182 190 198 206 214] - [262 270 278 286 294] - [342 350 358 366 374]] - - """ - operation = operation.lower() - if not operation in ['sum', 'mean', 'std']: - raise ValueError("Operation not supported.") - if ndarray.ndim != len(new_shape): - raise ValueError("Shape mismatch: {} -> {}".format(ndarray.shape, - new_shape)) - compression_pairs = [(d, c//d) for d,c in zip(new_shape, - ndarray.shape)] - - #print(len(new_shape)) - flattened = [l for p in compression_pairs for l in p] - - ndarray = ndarray.reshape(len(flattened)) - for i in range(len(new_shape)): - op = getattr(ndarray, operation) - ndarray = op(-1*(i+1)) - return ndarray - - - - - - -def fill_hidden_3(fishes, eods, filename): - fishes = fishes - #print('hidden_calcing...') - nohidefishes = [] - for cl, fish in enumerate(fishes): - #print('Step1: Fish ', cl, ' ', cl, ' / ', len(fishes)) - #f = np.memmap(filename[prefixlen:-4]+"_Fish%d"%cl+ "X.npmmp", dtype='float32', mode='w+', shape=(3,len(fish[0])*2), order = 'F') - f = np.zeros([3, len(fish[0])*2]) - fishisi = np.diff(fish[0]) - isi = fishisi[0] - lst_offst =0 - for i, newisi in enumerate(fishisi): - # print(cl, ' ..currently peak ', i, ' / ' , len(fishisi)) - newi = i+lst_offst - if newi > len(f[0])-1: # Errör - # print('Oh shit, nparray to small. doubling size') - f_new = np.empty([3,len(f[0])*2]) - f_new[:,:len(f[0])]=f - f = f_new - f[0][newi]=fish[0][i] - f[1][newi]=fish[1][i] - f[2][newi]=fish[2][i] - -# print(i, newi) - - - # print(cl, fish[0][i], isi, newisi) - if newisi > 2.8*isi: - guessx = fish[0][i] + isi - while guessx < fish[0][i] + newisi-0.8*isi: - peakx = peakaround3(guessx, isi*0.1, eods) - if peakx is not None: - newi = i+lst_offst - f[0][newi+1]=peakx - f[1][newi+1]=fish[1][i] - f[2][newi+1]=fish[2][i] - #print('estimated hidden peak: ', f[0][newi+1], f[2][newi+1]) - guessx = peakx + isi + (peakx-guessx) - lst_offst +=1 - #print('offset+1 at' ,i , peakx) - continue - break - isi = newisi - - - - nohidefishes.append(np.array([f[0,0:newi+1],f[1,0:newi+1],f[2,0:newi+1]])) - - - #print(x[0], x[200]) - return nohidefishes - - -def fill_hidden_Not(fishes, eods, filename): - fishes = fishes - #print('hidden_calcing...') - nohidefishes = [] - #for cl, fish in enumerate(fishes): - #print('Step1: Fish ', cl, ' ', cl, ' / ', len(fishes)) - #f = np.memmap(filename[prefixlen:-4]+"_Fish%d"%cl+ "X.npmmp", dtype='float32', mode='w+', shape=(3,len(fish[0])*2), order = 'F') - return nohidefishes - -def noisedelete_smoothing(array, binlen, method, thr1, thr2): - if len(array) <= 2: - if np.mean(array) > 140: - for a in array: - a = np.nan - return array, np.arange(0, len(array), 1) - temp_classisi = array - if len(array) > 11: - smoothed = savgol_filter(temp_classisi, 11, 1) - else: smoothed = savgol_filter(temp_classisi, 3, 1) - diff = np.square(smoothed-temp_classisi) - data = np.array(diff) - #plt.plot(diff, color = 'green') - result = np.median(data[:(data.size // binlen) * binlen].reshape(-1, binlen),axis=1) - result2 = bin_percentilediff(temp_classisi, binlen) - if method == 1: - result = result - elif method == 2: - result = result2 - if len(result) > 7: - smoothedresult = savgol_filter(result, 7, 1) - else: - smoothedresult = result - #plt.plot(np.arange(0,len(result)*binlen, binlen),result) - #plt.plot(smoothed) - #plt.plot(np.arange(0,len(result2)*20, 20), smoothedresult2) - #plt.plot(np.arange(0,len(result2)*20, 20), result2) - # plt.plot(temp_classisi, color = 'black') - # plt.plot(np.arange(0, len(result)*binlen, binlen),smoothedresult, 'red') - if method ==1 : - noiseindice = np.where(smoothedresult > thr1) - elif method == 2: - noiseindice = np.where(result > thr2)[0] - elif method == 3: - noiseindice = np.where(data > 1000) - print(noiseindice) - noiseindice = np.multiply(noiseindice, binlen) - print(noiseindice) - noiseindice = [x for i in noiseindice for x in range(i, i+binlen)] - print(np.diff(noiseindice)) - noiseindice = np.split(noiseindice, np.where((np.diff(noiseindice) != 1 ) & (np.diff(noiseindice) != 2) & (np.diff(noiseindice) != 3))[0]+1 ) - #print(noiseindice) - noiseindice = [x for arr in noiseindice if len(arr) > 1 for x in arr] - noiseindice= np.array(noiseindice) - #print(noiseindice) - array = np.array(array) - # Noise delete applial - if np.median(array) > 150: - noiseindice = np.arange(0, len(array), 1) - if len(noiseindice) > 0: - array[noiseindice] = np.nan - return array, noiseindice - -def noisedelete_lowpass(array,binlen): - origarray = array - if len(array) <= 5: - if np.mean(array) > 140 or np.mean(array) < 15: - for a in array: - a = np.nan - return array, [] #np.arange(0, len(array), 1) - array = np.array(array) - from scipy.signal import butter, lfilter - indice = [] - alldata = np.empty_like(array) - if len(array[np.isnan(array)]) > 0: - arrays = np.split(array, np.where(np.abs(np.diff(np.isnan(array))) == 1)[0]+1) - indice = np.where(np.abs(np.diff(np.isnan(array))) == 1)[0]+1 - indice = np.append(np.array([0]),indice) - else: - arrays = [array] - indice = [0] - for array,index in zip(arrays, indice): - if len(array) <2 or len(array[np.isnan(array)]) > 0: - alldata[index:index + len(array)] = array[:] - continue - print(array, 'array') - fs = 100 - cutoff = 25 - binlen = binlen - data = np.array(array, dtype = 'float64') - overlap = len(data)%binlen - if overlap > 0: - data = np.append(data, np.array([data[-1]]*(binlen-overlap)), axis = 0) - dataext = np.empty([data.shape[0]+20]) - dataext[:10]= data[0] - dataext[-10:] = data[-1] - dataext[10:-10]=data - B, A = butter(1, cutoff/ (fs / 2), btype = 'low') - #lpf_array = np.empty_like(dataext) - lpf_array= lfilter(B, A, dataext, axis = 0) - lpf_array = lfilter(B, A, lpf_array[::-1])[::-1] - lpf_binned_array = lpf_array[:(data.size // binlen) * binlen].reshape(-1, binlen) - lpf_array = lpf_array[10:-10] - if overlap > 0: - lpf_array[-(binlen-overlap):] = np.nan - data[-(binlen-overlap):] = np.nan - binned_array = data[:(data.size // binlen) * binlen].reshape(-1, binlen) - lpf_binned_array = lpf_array[:(data.size // binlen) * binlen].reshape(-1, binlen) - filterdiffs = np.empty([binned_array.shape[0]]) - #a = signal.firwin(1, cutoff = 0.3, window = "hamming") - for i, (bin_content, bin_filtered) in enumerate(zip(binned_array, lpf_binned_array)): - if i == binned_array.shape[0] - 1: - bin_content = bin_content[:-(binlen-overlap)] - bin_filtered = bin_filtered[:-(binlen-overlap)] - filterdiffs[i] = np.mean(np.square(np.subtract(bin_filtered[~np.isnan(bin_filtered)], bin_content[~np.isnan(bin_content)]))) - # filterdiff = filterdiff / len(bin_content) - print(filterdiffs) - binned_array[filterdiffs > 1, :] = np.nan - if overlap > 0: - data = binned_array.flatten()[:-(binlen-overlap)] - else: - data = binned_array.flatten() - print(data, 'data') - alldata[index:index + len(data)] = data - # twin[np.isnan(data)] = np.nan - # plt.plot(alldata, color = 'red') - # plt.plot(np.add(origarray, 2), color = 'blue') - # plt.ylim(0, 150) - # plt.show() - return alldata, [] - - # noiseindice = np.multiply(noiseindice, binlen) - # print(noiseindice) - # noiseindice = [x for i in noiseindice for x in range(i, i+binlen)] - # print(np.diff(noiseindice)) - # noiseindice = np.split(noiseindice, np.where((np.diff(noiseindice) != 1 ) & (np.diff(noiseindice) != 2) & (np.diff(noiseindice) != 3))[0]+1 ) - - # #print(noiseindice) - # noiseindice = [x for arr in noiseindice if len(arr) > 1 for x in arr] - # noiseindice= np.array(noiseindice) - # #print(noiseindice) - # array = np.array(array) - # # Noise delete applial - # if np.median(array) > 150: - # noiseindice = np.arange(0, len(array), 1) - # if len(noiseindice) > 0: - # array[noiseindice] = np.nan - # return array, noiseindice - - -def peakaround3(guessx, interval, eods): - pksinintv = eods[0][ ((guessx-interval < eods[0]) & (eods[0] < guessx+interval))] - if len(pksinintv)>0: - return(pksinintv[0]) - elif len(pksinintv) >1: - pksinintv = pksinintv[np.argmin(abs(pksinintv - guessx))] - return(pksinintv) ## might be bad, not tested - # for px in fish[0]: - # distold = interval - # if px < guessx-interval: - # continue - # # print('in area', guessx-interval) - # if guessx-interval < px < guessx+interval: - # found = True - # dist = px-guessx - # if abs(dist) < abs(distold): - # distold = dist - # if px > guessx+interval: - # - # if found == True: - # print(guessx, dist) - # time.sleep(5) - # return guessx + dist - # - # else: - # - # break - return None - - - -def fill_holes(fishes): #returns peakx, peaky, peakheight # Fills holes that seem to be missed peaks in peakarray with fake (X/Y/height)-Peaks - retur = [] - lost = [] - - #print('fill_holes fishes', fishes) - - for cl, fish in enumerate(fishes): - #print('Step2: Fish', cl) - fishisi = np.diff(fish[0]) - mark = np.zeros_like(fishisi) - isi = 0 - #print('mark', mark) - # print('fishisi' , fishisi) - #find zigzag: - c=0 - c0= 0 - n=0 - for i, newisi in enumerate(fishisi): - # print(newisi, isi) - if abs(newisi - isi)>0.15*isi: ## ZigZag-Detection : actually peaks of two classes in one class - leads to overlapping frequencys which shows in a zigzag pattern - if (newisi > isi) != (fishisi[i-1] > isi): - c+=1 - # print(abs(newisi - isi), 'x = ', fish[i].x) - c0+=1 - elif c > 0: - n += 1 - if n == 6: - if c > 6: - # print ('zigzag x = ', fish['x'][i-6-c0], fish['x'][i-6]) - mark[i-6-c0:i-6]= -5 - c = 0 - c0=0 - n = 0 - - #if c > 0: - # print(i, c) - # if c == 6: - # print('zigzag!') - isi = newisi - isi = 0 - for i, newisi in enumerate(fishisi): ## fill holes of up to 3 Peaks # Changed to: Only up to 1 Peak because : Holes might be intended for communicational reasons - #print('mark: ' , mark) - if mark[i] == -5: continue - if i+2 >= len(fishisi): - continue - if (2.2*isi > newisi > 1.8*isi) and (1.5*isi>fishisi[i+1] > 0.5*isi) : - mark[i] = 1 - isi = newisi - # print('found 1!' , i) - elif (2.2*isi > newisi > 1.8*isi) and (2.2*isi> fishisi[i+1] > 1.8*isi) and (1.5*isi > fishisi[i+2] > 0.5*isi): - mark[i] = 1 - isi = isi - #elif 3.4*isi > newisi > 2.6*isi and 1.5*isi > fishisi[i+1] > 0.5*isi: - # mark[i] = 2 - - elif (0.6* isi > newisi > 0): - # print('-1 found', i ) - if mark[i] ==0 and mark[i+1] ==0 and mark[i-1]==0 : - # isi newisi - # continue - # print('was not already set') - if fishisi[i-2] > isi < fishisi[i+1]: - mark[i] = -1 - # print('-1') - elif isi > fishisi[i+1] < fishisi[i+2]: - mark[i+1] = -1 - # print('-1') - isi = newisi - x = [] - y = [] - h = [] - x_lost=[] - y_lost=[] - h_lost=[] - # print('filledmarks: ', mark) - for i, m in enumerate(mark): - if m == -1 : - # print('-1 at x = ', fish['x'][i]) - continue - if m == -5: - x_lost.append(fish[0][i]) - y_lost.append(fish[1][i]) - h_lost.append(fish[2][i]) - x.append(fish[0][i]) - y.append(fish[1][i]) - h.append(fish[2][i]) - continue - x.append(fish[0][i]) - y.append(fish[1][i]) - h.append(fish[2][i]) - if m == 1: - # print('hofly added peak at x = ' , fish['x'][i]) - x.append(fish[0][i] + fishisi[i-1]) - y.append( 0.5*(fish[1][i]+fish[1][i+1])) - h.append(0.5*(fish[2][i]+fish[2][i+1])) - elif m== 2: - x.append(fish[0][i] + fishisi[i]) - y.append( 0.5*(fish[1][i]+fish[1][i+1])) - h.append(0.5*(fish[2][i]+fish[2][i+2])) - x.append(fish[0][i] + 2*fishisi[i-1]) - y.append( 0.5*(fish[1][i]+fish[1][i+2])) - h.append(0.5*(fish[2][i]+fish[2][i+2])) - # print('added at x = ', fish[0][i] + fishisi[i]) - x = np.array(x) - y= np.array(y) - h = np.array(h) - x_lost = np.array(x_lost) - y_lost = np.array(y_lost) - h_lost = np.array(h_lost) - #print('retur', x, y, h) - retur.append([x,y,h]) - lost.append([x_lost,y_lost,h_lost]) - # filledpeaks =np.array(filledpeaks) - # print(filledpeaks.shape) - # filledpeaks. - return retur, lost - - -# eods[-len(thisblock_eods[:,]):] = thisblock_eods -# eods = np.memmap("eods_"+filename[:-3]+"npy", dtype='float32', mode='r+', shape=(4,eods_len)) - #fp = np.memmap(filepath[:-len(filename)]+"eods_"+filename[:-3]+"npy", dtype='float32', mode='r+', shape=(4,len(thisblock_eods[:,]))) - #nix print( b.data_arrays) - # for cl in np.unique(cllist): - # currentfish_x = x[:][cllist == cl] - # currentfish_y = y[:][cllist == cl] - # currentfish_h d= x[:][cllist == cl] - #nix try: - #nix xpositions[cl] = b.create_data_array("f%d_eods" %cl, "spiketimes", data = currentfish_x) - #nix xpositions[cl].append_set_dimension() - #nix # thisfish_eods = b.create_multi_tag("f%d_eods_x"%cl, "eods.position", xpositions[cl]) - #nix # thisfish_eods.references.append(nixdata) - #nix except nix.pycore.exceptions.exceptions.DuplicateName: - #nix - #nix xpositions[cl].append(currentfish_x) - - - #thisfish_eods.create_feature(y, nix.LinkType.Indexed) - #b.create_multi_tag("f%d_eods_y"%cl, "eods.y", positions = y) - #b.create_multi_tag("f%d_eods_h"%cl, "eods.amplitude", positions = h) - #thisfish_eods.create_feature - - #nix file.close() - # Save Data - # Needed: - # Meta: Starttime, Startdate, Length - # x, y, h, cl, difftonextinclass -> freq ? , - - # Later: Find "Nofish" - # Find "Twofish" - # Find "BadData" - # Find "Freqpeak" - # ? Find "Amppeak" - # - - # bigblock = np.array(bigblock) - # x=xarray(bigblock) - # y=yarray(bigblock) - # cl=clarray(bigblock) - - - #nix file = nix.File.open(file_name, nix.FileMode.ReadWrite) - #nix b = file.blocks[0] - #nix nixdata = b.data_arrays[0] - #nix cldata = [] - #nix print(classes) - #nix print(b.data_arrays) - #nix for i in range(len(np.unique(classes))): - #nix cldata.append(b.data_arrays[i+1]) - - - # for cl in - - # for cl in - # x = thisfish_eods - - - #nix file.close() - - - -def reduce_classes(npFishes): - offtimeclasses = [] - for i, fish in enumerate(npFishes): - fish = np.array(fish) - #print(fish[0]) - # print('nüFishes before and after command') - # print('bef', npFishes[i][0][0]) - # print(fish[:,:][:,np.where(~np.isnan(fish[0]))].reshape(4,-1)) - npFishes[i] = fish[:,:][:,np.where(~np.isnan(fish[0]))][:,0] - # print('after', npFishes[i][0][0]) - if len(npFishes[i][0]) <= 100: - offtimeclasses.append(i) - #print('delete class ', i) - #print('Len offtime vs len Fishes', len(offtimeclasses), len(npFishes)) - for index in sorted(offtimeclasses, reverse=True): - del npFishes[index] - #print('npFishes to check features', npFishes[0][3]) - srt_beg = sort_beginning(npFishes) - # print(len(npFishes[0])) - # print(len(srt_beg)) - #srt_end = sort_ending(npFishes) - if len(srt_beg) >= 1: - reduced = [] - reduced.append(srt_beg[0]) - #for i, fish in enumerate(srt_beg): - #print(len(srt_beg)) - #print('reducing classes') - for i in range(1, len(srt_beg)): - #print('.', end = '') - cl = 0 - reducedlen_beg = len(reduced) - while cl < reducedlen_beg: - cond1 = reduced[cl][0][-1] < srt_beg[i][0][0] - cond2 = False - nxt=i+1 - while nxt < len(srt_beg) and srt_beg[i][0][-1] > srt_beg[nxt][0][0]: #part ends after another part started (possibly in the other part. - if len(srt_beg[nxt][0]) > len(srt_beg[i][0]):# -> lencheck to pick longer part) - reduced.append(srt_beg[i]) - # print('case1') - break - nxt+=1 - else: - cond2 = True - # print('lenreduced', len(reduced), len(srt_beg)) - #print(i, cl, cond1, cond2 ) - if cond1 and cond2: - #print(reduced[cl].shape, srt_beg[i].shape) - reduced[cl] = np.concatenate((reduced[cl],srt_beg[i]), axis=1) - #print(len(reduced[cl][0]), len(srt_beg[i][0])) - cl+=1 - break - if cond2 == False: - break - cl+=1 - else: - reduced.append(srt_beg[i]) - - #print('len red', len(reduced)) - #print(len(npFishes[0])) - return reduced - else: - return [] - -def sort_beginning(npFishes): - srted = npFishes - srted.sort(key=lambda x: x[0][0]) - #for i in srted[0][0]: - # print(i) - - return srted - -def sort_ending(npFishes): - srted = npFishes[:] - srted.sort(key=lambda x: x[0][-1]) - return srted - -def noisedscrd(fishes): - for fish in fishes: - print(np.std(fish[2])) - - -def plot_ontimes(ontime): - plt.fill_between(range(len(ontime[0])), ontime[0], color = '#1e2c3c', label = 'close') #'#324A64' - plt.fill_between(range(len(ontime[1])), ontime[1], color = '#324A64', label = 'nearby') - plt.fill_between(range(len(ontime[2])), ontime[2], color = '#8ea0b4', label = 'far') - plt.xlabel('seconds') - plt.ylabel('position') - plt.legend(loc = 1) - plt.ylim(0,1.5) - # plt.xlim(0,len()) - plt.show() - - - -if __name__ == '__main__': - main() diff --git a/thunderfish/DextersThunderfishAddition/let_them_rum_allfiles_DexThunder.sh b/thunderfish/DextersThunderfishAddition/let_them_rum_allfiles_DexThunder.sh deleted file mode 100644 index c04b68a9..00000000 --- a/thunderfish/DextersThunderfishAddition/let_them_rum_allfiles_DexThunder.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash - -cat allfiles.txt | while read -r line - do - python3 -W ignore DextersThunderfishAddition.py "${line:0:-4}/$line" - done diff --git a/thunderfish/DextersThunderfishAddition/run_example.sh b/thunderfish/DextersThunderfishAddition/run_example.sh deleted file mode 100644 index 14e372a9..00000000 --- a/thunderfish/DextersThunderfishAddition/run_example.sh +++ /dev/null @@ -1,5 +0,0 @@ -python3 -W ignore DextersThunderfishAddition.py ~/40320L01_G11/40320L01_G11.WAV 0 1 1 3700 4000 - -#python3 analyseDexRefactorShort.py ~/70319L01_F1/70319L01_F1.WAV 0 1 1 #3700 4000 -#python3 analyseDexRefactorShort.py ~/40320L01_G11/40320L01_G11.WAV 0 1 1 #3700 4000 -#python3 analyseDexRefactor.py ~/40320L01_G11/40320L01_G11.WAV 0 1 1 0 100 diff --git a/thunderfish/DextersThunderfishAddition/DextersThunderfishAddition.py b/thunderfish/pulsetracker.py similarity index 89% rename from thunderfish/DextersThunderfishAddition/DextersThunderfishAddition.py rename to thunderfish/pulsetracker.py index 44e6ef32..4276d128 100644 --- a/thunderfish/DextersThunderfishAddition/DextersThunderfishAddition.py +++ b/thunderfish/pulsetracker.py @@ -1,31 +1,28 @@ +""" +by Dexter Frueh +""" + import sys import numpy as np import copy -#from scipy.stats import gmainan from scipy import stats from scipy import signal from scipy import optimize import matplotlib -from fish import ProgressFish +#from fish import ProgressFish import matplotlib.pyplot as plt -#from thunderfish.dataloader import open_data -#from thunderfish.peakdetection import detect_main +from thunderfish.dataloader import open_data +from thunderfish.eventdetection import detect_peaks from scipy.interpolate import interp1d -from scipy.signal import savgol_filter from sklearn.preprocessing import StandardScaler from sklearn.decomposition import PCA from sklearn.cluster import DBSCAN -#from sklearn import metrics -#from mpl_toolkits.mplot3d import Axes3D from sklearn.cluster import AgglomerativeClustering from collections import deque import ntpath -import nixio as nix import time import os from shutil import copy2 -from ownDataStructures import Peak, Tr, Peaklist -from IPython import embed def makeeventlist(main_event_positions,side_event_positions,data,event_width=20): """ @@ -98,6 +95,7 @@ def makeeventlist(main_event_positions,side_event_positions,data,event_width=20) # generate return array and discard all events that are not marked as real EOD_events = np.array([main_x, main_y, main_h], dtype = np.float)[:,main_real==1] return EOD_events + def discardnearbyevents(event_locations, event_heights, min_distance): """ Given a number of events with given location and heights, returns a selection @@ -149,11 +147,13 @@ def discardnearbyevents(event_locations, event_heights, min_distance): print('Warning: unusual many discarding steps needed, unusually dense events') pass return event_indices, event_locations, event_heights + def crosscorrelation(sig, data): 'returns crosscorrelation of two arrays, the first array should have a length equal to or smaller than the second array.' return signal.fftconvolve(data, sig[::-1], mode='valid') + def interpol(data, kind): - ''' + """ interpolates the given data using scipy interpolation python package Parameters @@ -167,13 +167,14 @@ def interpol(data, kind): ------- interpolation: function - ''' + """ width = len(data) x = np.linspace(0, width-1, num = width, endpoint = True) - return interp1d(x, data[0:width], kind , assume_sorted=True) + #return interp1d(x, data[0:width], kind, assume_sorted=True) + return interp1d(x, data[0:width], kind) def interpolated_array(data, kind, int_fact): - ''' + """ returns an interpolated array of the given dataarray. Parameters @@ -190,11 +191,11 @@ def interpolated_array(data, kind, int_fact): ------- interpolated array: array - ''' + """ return interpol(data,kind)(np.arange(0, len(data)-1, 1/int_fact)) def cut_snippets(data,event_locations,cut_width,int_met="linear",int_fact=10,max_offset = 1.5): - ''' + """ cuts intervals from a data array, interpolates and aligns them and returns them in a list Parameters @@ -221,7 +222,7 @@ def cut_snippets(data,event_locations,cut_width,int_met="linear",int_fact=10,max aligned_snips: twodimensional nparray the processed intervals (interval#,intervallen) - ''' + """ snippets = [] cut_width = [-cut_width, cut_width] alignwidth = int(np.ceil((max_offset) * int_fact)) @@ -330,11 +331,23 @@ def cluster_events(features, events, order, eps, min_samples, takekm, method='DB events = np.append(events,[labels], axis = 0) return events +class Peaklist(object): + def __init__(self, peaklist): + self.list = peaklist + self.lastofclass = {} + self.lastofclassx = {} + self.classesnearby = [] + self.classesnearbyx = [] + self.classesnearbypccl = [] + self.classlist = [] + self.classamount = 0 + self.shapes = {} + def connect_blocks(oldblock): - ''' + """ used to connect blocks. transfers data from the previous analysis block to the current block - ''' + """ newblock = Peaklist([]) newblock.lastofclass = oldblock.lastofclass newblock.lastofclassx = oldblock.lastofclassx @@ -346,10 +359,10 @@ def connect_blocks(oldblock): return newblock def alignclusterlabels(labels, peaklist, peaks, data='test'): - ''' + """ used to connect blocks. changes the labels of clusters in the current block to fit with the labels of the previous block - ''' + """ overlapamount = len(peaks[:,peaks[0]<30000]) if overlapamount == 0: return None @@ -549,9 +562,9 @@ def ampwalkclassify3_refactor(peaks,peaklist): return peaks, peaklist def discard_wave_pulses(peaks, data): - ''' + """ discards events from a pulse_event list which are unusally wide (wider than a tenth of the inter pulse interval), which indicates a wave-type EOD instead of a pulse type - ''' + """ deleteclasses = [] for cl in np.unique(peaks[3]): peaksofclass = peaks[:,peaks[3] == cl] @@ -576,9 +589,9 @@ def discard_wave_pulses(peaks, data): return peaks def plot_events_on_data(peaks, data): - ''' + """ plots the detected events onto the data timeseries. If the events are classified, the classes are plotted in different colors and the class -1 (not belonging to a cluster) is plotted in black - ''' + """ plt.plot(range(len(data)),data, color = 'black') if len(peaks)>3: classlist = np.array(peaks[3],dtype=np.int) @@ -601,9 +614,9 @@ def plot_events_on_data(peaks, data): plt.close() def discard_short_classes(events, minlen): - ''' + """ returns all events despite events which are in classes with less than minlen members - ''' + """ classlist = events[3] smallclasses = [cl for cl in np.unique(classlist) if len(classlist[classlist == cl]) < @@ -618,6 +631,7 @@ def path_leaf(path): ntpath.basename("a/b/c") head, tail = ntpath.split(path) return tail or ntpath.basename(head) + def save_EOD_events_to_npmmp(EOD_Events,eods_len,startblock,datasavepath,mmpname='eods.npmmp'): n_EOD_Events = len(EOD_Events[0]) savepath = datasavepath+"/"+mmpname @@ -631,12 +645,13 @@ def save_EOD_events_to_npmmp(EOD_Events,eods_len,startblock,datasavepath,mmpname 'float64', mode='r+', offset = dtypesize*eods_len*4, shape=(4,n_EOD_Events), order = 'F') eods[:] = EOD_Events + def discard_small_EODs(EOD_Events, ultimate_threshold): return EOD_Events[:,np.where(EOD_Events[2]>ultimate_threshold)] return np.where(EOD_Events[2]>ultimate_threshold) -def analyze_pulse_data(filepath,absolutepath=True, deltat=30, thresh=0.04, starttime = 0, endtime = 0, savepath = False,save=False, npmmp = False, plot_steps=False, plot_result=False): - ''' +def analyze_pulse_data(filepath, deltat=30, thresh=0.04, starttime = 0, endtime = 0, savepath = False,save=False, npmmp = False, plot_steps=False, plot_result=False): + """ analyzes timeseries of a pulse fish EOD recording Parameters @@ -677,30 +692,8 @@ def analyze_pulse_data(filepath,absolutepath=True, deltat=30, thresh=0.04, start ------- eods: numpy array 2D numpy array. first axis: attributes of an EOD (x (datapoints), y (recorded voltage), height (difference from maximum to minimum), class), second axis: EODs in chronological order. - ''' - import sys - import numpy as np - import copy - from scipy.stats import gmean - from scipy import stats - from scipy import signal - from scipy import optimize - import matplotlib - from fish import ProgressFish - import matplotlib.pyplot as plt - from thunderfish.dataloader import open_data - from thunderfish.peakdetection import detect_peaks - from scipy.interpolate import interp1d - from scipy.signal import savgol_filter - from collections import deque - import ntpath - import nixio as nix - import time - import os - from shutil import copy2 - from ownDataStructures import Peak, Tr, Peaklist - import DextersThunderfishAddition as dta - from IPython import embed + """ + # parameters for the analysis thresh = 0.04 # minimal threshold for peakdetection peakwidth = 20 # width of a peak and minimal distance between two EODs @@ -713,10 +706,6 @@ def analyze_pulse_data(filepath,absolutepath=True, deltat=30, thresh=0.04, start #starttime = 0 #endtime = 0 #timegiven = 0 - home = os.path.expanduser('~') - if absolutepath: - filepath = home+ '/'+ filepath - #os.chdir(home) #save = int(save) #plot_steps = int(plot_steps) starttime = int(starttime) @@ -729,7 +718,7 @@ def analyze_pulse_data(filepath,absolutepath=True, deltat=30, thresh=0.04, start filename = path_leaf(filepath) eods_len = 0 if savepath==False: - datasavepath = home+'/'+filename[:-4] + datasavepath = filename[:-4] elif savepath==True: datasavepath = input('With the option npmmp enabled, a numpy memmap will be saved to: ').lower() else: datasavepath=savepath @@ -762,41 +751,42 @@ def analyze_pulse_data(filepath,absolutepath=True, deltat=30, thresh=0.04, start print('blockamount: ' , blockamount) progress = 0 print(progress, '%' , flush = True, end = " ") - fish = ProgressFish(total = blockamount) + #fish = ProgressFish(total = blockamount) for idx in range(0, blockamount): blockdata = data[idx*nblock:(idx+1)*nblock] if progress < (idx*100 //blockamount): progress = (idx*100)//blockamount progressstr = ' Filestatus: ' - fish.animate(amount = idx, dexextra = progressstr) + #fish.animate(amount = idx, dexextra = progressstr) pk, tr = detect_peaks(blockdata, thresh) troughs = tr if len(pk) > 3: - peaks = dta.makeeventlist(pk,tr,blockdata,peakwidth) - peakindices, peakx, peakh = dta.discardnearbyevents(peaks[0],peaks[1],peakwidth) + peaks = makeeventlist(pk,tr,blockdata,peakwidth) + peakindices, peakx, peakh = discardnearbyevents(peaks[0],peaks[1],peakwidth) peaks = peaks[:,peakindices] if len(peaks) > 0: if idx > startblock: - peaklist = dta.connect_blocks(peaklist) + peaklist = connect_blocks(peaklist) else: peaklist = Peaklist([]) - aligned_snips = dta.cut_snippets(blockdata,peaks[0], 15, int_met = "cubic", int_fact = 10,max_offset = 1.5) - pcs = dta.pc(aligned_snips)#pc_refactor(aligned_snips) + aligned_snips = cut_snippets(blockdata,peaks[0], 15, int_met = "cubic", int_fact = 10,max_offset = 1.5) + print(aligned_snips.shape) + pcs = pc(aligned_snips)#pc_refactor(aligned_snips) order = 5 minpeaks = 3 if deltat < 2 else 10 - labels = dta.cluster_events(pcs, peaks, order, 0.4, minpeaks, False, method = 'DBSCAN') + labels = cluster_events(pcs, peaks, order, 0.4, minpeaks, False, method = 'DBSCAN') peaks = np.append(peaks,[labels], axis = 0) - #dta.plot_events_on_data(peaks, blockdata) + #plot_events_on_data(peaks, blockdata) num = 1 if idx > startblock: - dta.alignclusterlabels(labels, peaklist, peaks,data=blockdata) - peaks, peaklist = dta.ampwalkclassify3_refactor(peaks, peaklist) # classification by amplitude + alignclusterlabels(labels, peaklist, peaks,data=blockdata) + peaks, peaklist = ampwalkclassify3_refactor(peaks, peaklist) # classification by amplitude minlen = 6 - peaks = dta.discard_short_classes(peaks, minlen) + peaks = discard_short_classes(peaks, minlen) if len(peaks[0]) > 0: - peaks = dta.discard_wave_pulses(peaks, blockdata) + peaks = discard_wave_pulses(peaks, blockdata) if plot_steps == True: - dta.plot_events_on_data(peaks, blockdata) + plot_events_on_data(peaks, blockdata) pass peaklist.len = nblock worldpeaks = np.copy(peaks) @@ -815,7 +805,7 @@ def analyze_pulse_data(filepath,absolutepath=True, deltat=30, thresh=0.04, start all_eods = np.concatenate((all_eods,thisblock_eods),axis = 1) else: all_eods = thisblock_eods - #dta.plot_events_on_data(all_eods,data) + #plot_events_on_data(all_eods,data) print('returnes analyzed EODS. Calculate frequencies using all of these but discard the data from the EODS within the lowest few percent of amplitude') if npmmp: all_eods = np.memmap(datasavepath+'/'+mmpname, dtype='float64', mode='r+', shape=(4,eods_len), order = 'F') @@ -862,31 +852,8 @@ def analyze_long_pulse_data_file(filepath,save=0,plot_steps=0,new=1,starttime = # - new : if True, do a new analysis of the recording, even if there # is an existing analyzed .npy file with the right name. # - import sys - import numpy as np - import copy - from scipy.stats import gmean - from scipy import stats - from scipy import signal - from scipy import optimize - import matplotlib - from fish import ProgressFish - import matplotlib.pyplot as plt - from thunderfish.dataloader import open_data - from thunderfish.peakdetection import detect_peaks - from scipy.interpolate import interp1d - from scipy.signal import savgol_filter - from collections import deque - import ntpath - import nixio as nix - import time - import os - from shutil import copy2 - from ownDataStructures import Peak, Tr, Peaklist - import DextersThunderfishAddition as dta - from IPython import embed + # parameters for the analysis - deltat = 30.0 # seconds of buffer size thresh = 0.04 # minimal threshold for peakdetection peakwidth = 20 # width of a peak and minimal distance between two EODs @@ -949,7 +916,7 @@ def analyze_long_pulse_data_file(filepath,save=0,plot_steps=0,new=1,starttime = print('blockamount: ' , blockamount) progress = 0 print(progress, '%' , flush = True, end = " ") - fish = ProgressFish(total = blockamount) + #fish = ProgressFish(total = blockamount) # blockwise analysis for idx in range(0, blockamount): @@ -958,43 +925,43 @@ def analyze_long_pulse_data_file(filepath,save=0,plot_steps=0,new=1,starttime = if progress < (idx*100 //blockamount): progress = (idx*100)//blockamount progressstr = ' Filestatus: ' - fish.animate(amount = idx, dexextra = progressstr) + #fish.animate(amount = idx, dexextra = progressstr) #---analysis----------------------------------------------------------------------- # step1: detect peaks in timeseries pk, tr = detect_peaks(blockdata, thresh) troughs = tr # continue with analysis only if multiple peaks are detected if len(pk) > 3: - peaks = dta.makeeventlist(pk,tr,blockdata,peakwidth) + peaks = makeeventlist(pk,tr,blockdata,peakwidth) - #dta.plot_events_on_data(peaks, blockdata) - peakindices, peakx, peakh = dta.discardnearbyevents(peaks[0],peaks[1],peakwidth) + #plot_events_on_data(peaks, blockdata) + peakindices, peakx, peakh = discardnearbyevents(peaks[0],peaks[1],peakwidth) peaks = peaks[:,peakindices] if len(peaks) > 0: # used to connect the results of the current block with the previous if idx > startblock: - peaklist = dta.connect_blocks(peaklist) + peaklist = connect_blocks(peaklist) else: peaklist = Peaklist([]) - aligned_snips = dta.cut_snippets(blockdata,peaks[0], 15, int_met = "cubic", int_fact = 10,max_offset = 1.5) - pcs = dta.pc(aligned_snips)#pc_refactor(aligned_snips) + aligned_snips = cut_snippets(blockdata,peaks[0], 15, int_met = "cubic", int_fact = 10,max_offset = 1.5) + pcs = pc(aligned_snips)#pc_refactor(aligned_snips) order = 5 minpeaks = 3 if deltat < 2 else 10 - labels = dta.cluster_events(pcs, peaks, order, 0.4, minpeaks, False, method = 'DBSCAN') + labels = cluster_events(pcs, peaks, order, 0.4, minpeaks, False, method = 'DBSCAN') peaks = np.append(peaks,[labels], axis = 0) - #dta.plot_events_on_data(peaks, blockdata) + #plot_events_on_data(peaks, blockdata) num = 1 if idx > startblock: - dta.alignclusterlabels(labels, peaklist, peaks,data=blockdata) - peaks, peaklist = dta.ampwalkclassify3_refactor(peaks, peaklist) # classification by amplitude + alignclusterlabels(labels, peaklist, peaks,data=blockdata) + peaks, peaklist = ampwalkclassify3_refactor(peaks, peaklist) # classification by amplitude minlen = 6 # >=1 - peaks = dta.discard_short_classes(peaks, minlen) + peaks = discard_short_classes(peaks, minlen) if len(peaks[0]) > 0: - peaks = dta.discard_wave_pulses(peaks, blockdata) + peaks = discard_wave_pulses(peaks, blockdata) # plots the data part and its detected and classified peaks if plot_steps == True: - dta.plot_events_on_data(peaks, blockdata) + plot_events_on_data(peaks, blockdata) pass worldpeaks = np.copy(peaks) # change peaks location in the buffered part to the location relative to the @@ -1033,11 +1000,11 @@ def main(): # starttime = int(sys.argv[5]) # endtime = int(sys.argv[6]) - # #eods = analyze_pulse_data(sys.argv[1],starttime=0,endtime =50,save = 0, new = 1, plot_steps=0) - # #eods = analyze_pulse_data(sys.argv[1],starttime=0,endtime =50,save = 0, npmmp= True, plot_steps=0) - # #eods = analyze_pulse_data(sys.argv[1],save = 0, npmmp= True, plot_steps=0) + # #eods = analyze_pulse_data(sys.argv[1], starttime=0, endtime=50, save=0, new=1, plot_steps=0) + # #eods = analyze_pulse_data(sys.argv[1], starttime=0, endtime=50, save=0, npmmp=True, plot_steps=0) + # #eods = analyze_pulse_data(sys.argv[1], save=0, npmmp=True, plot_steps=0) print(sys.argv[1]) - eods = analyze_pulse_data(sys.argv[1],save = True, npmmp= True) + eods = analyze_pulse_data(sys.argv[1], save=True, npmmp=True) print(eods) # # colors = plt.get_cmap('jet')(np.linspace(0, 1.0, np.unique(eods[3])[-1]+1)) #len(np.unique(classlist))))