diff --git a/.gitignore b/.gitignore index 7db0b162..da07e202 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ *.pdf *.txt .coverage +__pycache__ *~ diff --git a/thunderfish/DextersThunderfishAddition/DextersThunderfishAddition.py b/thunderfish/DextersThunderfishAddition/DextersThunderfishAddition.py new file mode 100644 index 00000000..44e6ef32 --- /dev/null +++ b/thunderfish/DextersThunderfishAddition/DextersThunderfishAddition.py @@ -0,0 +1,1055 @@ +import sys +import numpy as np +import copy +#from scipy.stats import gmainan +from scipy import stats +from scipy import signal +from scipy import optimize +import matplotlib +from fish import ProgressFish +import matplotlib.pyplot as plt +#from thunderfish.dataloader import open_data +#from thunderfish.peakdetection import detect_main +from scipy.interpolate import interp1d +from scipy.signal import savgol_filter +from sklearn.preprocessing import StandardScaler +from sklearn.decomposition import PCA +from sklearn.cluster import DBSCAN +#from sklearn import metrics +#from mpl_toolkits.mplot3d import Axes3D +from sklearn.cluster import AgglomerativeClustering +from collections import deque +import ntpath +import nixio as nix +import time +import os +from shutil import copy2 +from ownDataStructures import Peak, Tr, Peaklist +from IPython import embed + +def makeeventlist(main_event_positions,side_event_positions,data,event_width=20): + """ + Generate array of events that might be EODs of a pulse-type fish, using the location of peaks and troughs, + the data and an optional width of an supposed EOD-event. + The generated event-array contains location and height of such events. + The height of the events is calculated by its height-difference to nearby troughs and main events that have no side events in a range closer than event_width are discarded and not considered as EOD event. + + Parameters + ---------- + main_event_positions: array of int or float + Positions of the detected main events in the data time series. Either peaks or troughs. + side_event_positions: array of int or float + Positions of the detected side events in the data time series. The complimentary event to the main events. + data: array of float + The given data. + event_width: int or float, optional + + Returns + ------- + EOD_events: ndarray + 2D array containing data with 'np.float' type, size (number_of_properties = 3, number_of_events). + Generated and combined data of the detected events in an array with arrays of x, y and height along the first axis. + + """ + mainfirst = int((min(main_event_positions[0],side_event_positions[0])= 0 and r_side_ind < len(side_event_positions): + if min((l_distance),(r_distance)) > event_width: + r[...] = False + elif max((l_distance),(r_distance)) <= event_width: + h[...] = max(abs(y-l_side_y),abs(y-r_side_y)) #calculated using absolutes in case of for example troughs instead of peaks as main events + else: + if (l_distance)<(r_distance): # evaluated only when exactly one side event is out of reach of the event width. Then the closer event will be the correct event + h[...] = abs(y-l_side_y) + else: + h[...] = abs(y-r_side_y) + # check corner cases + elif l_side_ind == -1: + if r_distance > event_width: + r[...] = False + else: + h[...] = y-r_side_y + elif r_side_ind == len(side_event_positions): + if l_distance> event_width: + r[...] = False + else: + h[...] = y-l_side_y + # generate return array and discard all events that are not marked as real + EOD_events = np.array([main_x, main_y, main_h], dtype = np.float)[:,main_real==1] + return EOD_events +def discardnearbyevents(event_locations, event_heights, min_distance): + """ + Given a number of events with given location and heights, returns a selection + of these events where no event is closer than eventwidth to the next event. + Among neighboring events closer than eventwidth the event with smaller height + is discarded. + Used to discard sidepeaks in detected multiple peaks of single EOD-pulses and + only keep the largest event_height and the corresponding location as + representative of the whole EOD pulse. + + Parameters + ---------- + event_locations: array of int or float + Positions of the given events in the data time series. + event_heights: array of int or float + Heights of the given events, indices refer to the same events as in + event_locations. + min_distance: int or float + minimal distance between events before one of the events gets discarded. + + Returns + ------- + event_locations: array of int or float + Positions of the returned events in the data time series. + event_heights: array of int or float + Heights of the returned events, indices refer to the same events as in + event_locations. + + """ + unchanged = False + counter = 0 + event_indices = np.arange(0,len(event_locations)+1,1) + while unchanged == False:# and counter<=200: + x_diffs = np.diff(event_locations) + events_delete = np.zeros(len(event_locations)) + for i, diff in enumerate(x_diffs): + if diff < min_distance: + if event_heights[i+1] > event_heights[i] : + events_delete[i] = 1 + else: + events_delete[i+1] = 1 + event_heights = event_heights[events_delete!=1] + event_locations = event_locations[events_delete!=1] + event_indices = event_indices[np.where(events_delete!=1)[0]] + if np.count_nonzero(events_delete)==0: + unchanged = True + counter += 1 + if counter > 2000: + print('Warning: unusual many discarding steps needed, unusually dense events') + pass + return event_indices, event_locations, event_heights +def crosscorrelation(sig, data): + 'returns crosscorrelation of two arrays, the first array should have a length equal to or smaller than the second array.' + return signal.fftconvolve(data, sig[::-1], mode='valid') +def interpol(data, kind): + ''' + interpolates the given data using scipy interpolation python package + + Parameters + ---------- + data: array + + kind: string or int + (‘linear’, ‘nearest’, ‘zero’, ‘slinear’, ‘quadratic’, ‘cubic’, ‘previous’, ‘next’), or integer of order of spline interpolation to be used + + Returns + ------- + interpolation: function + + ''' + width = len(data) + x = np.linspace(0, width-1, num = width, endpoint = True) + return interp1d(x, data[0:width], kind , assume_sorted=True) + +def interpolated_array(data, kind, int_fact): + ''' + returns an interpolated array of the given dataarray. + + Parameters + ---------- + data: array + + kind: string or int + (‘linear’, ‘nearest’, ‘zero’, ‘slinear’, ‘quadratic’, ‘cubic’, ‘previous’, ‘next’), or integer of order of spline interpolation to be used + + int_fact: int + factor by which the interpolated array is larger than the original array + + Returns + ------- + interpolated array: array + + ''' + return interpol(data,kind)(np.arange(0, len(data)-1, 1/int_fact)) + +def cut_snippets(data,event_locations,cut_width,int_met="linear",int_fact=10,max_offset = 1.5): + ''' + cuts intervals from a data array, interpolates and aligns them and returns them in a list + + Parameters + ---------- + data: array + + event_locations: array + + cut_width: [int, int] + lower and upper limit of the intervals relative to the event locations. + f.e. [-15,15] indicates an interval of 30 datapoints around each event location +s + int_met: string or int + method of interpolation. (‘linear’, ‘nearest’, ‘zero’, ‘slinear’, ‘quadratic’, ‘cubic’, ‘previous’, ‘next’), or integer of order of spline interpolation to be used + + int_fact: int + factor by which the interpolated array is larger than the original + + max_offset: float + maximal offset by which the interpolated intervals can be moved to be aligned with each other. offset relative to the datapoints of the original data. + + Returns + ------- + aligned_snips: twodimensional nparray + the processed intervals (interval#,intervallen) + + ''' + snippets = [] + cut_width = [-cut_width, cut_width] + alignwidth = int(np.ceil((max_offset) * int_fact)) + for pos in event_locations.astype('int'): + snippets.append(data[pos+cut_width[0]:pos+cut_width[1]]) + ipoled_snips = np.empty((len(snippets), (cut_width[1]-cut_width[0])*int_fact-int_fact)) + for i, snip in enumerate(snippets): + if len(snip) < ((cut_width[1]-cut_width[0])): + if i == 0: + snip = np.concatenate([np.zeros([((cut_width[1]-cut_width[0]) - len(snip))]),np.array(snip)]) + if i == len(snippets): + snip = np.concatenate([snip, np.zeros([((cut_width[1]-cut_width[0])-len(snip))])]) + else: + snip = np.zeros([(cut_width[1]-cut_width[0])]) + #f_interpoled = interpol(snip, int_met) #if len(snip) > 0 else np.zeros([(cut_width[1]-cut_width[0]-1)*int_fact ]) + interpoled_snip = interpolated_array(snip, int_met, 10)#f_interpoled(np.arange(0, len(snip)-1, 1/int_fact)) + intsnipheight = np.max(interpoled_snip) - np.min(interpoled_snip) + if intsnipheight == 0: + intsnipheight = 1 + interpoled_snip = (interpoled_snip - max(interpoled_snip))* 1/intsnipheight + ipoled_snips[i] = interpoled_snip + mean = np.mean(ipoled_snips, axis = 0) + aligned_snips = np.empty((len(snippets), (cut_width[1]-cut_width[0])* int_fact-(2*alignwidth)-int_fact)) + for i, interpoled_snip in enumerate(ipoled_snips): + cc = crosscorrelation(interpoled_snip[alignwidth:-alignwidth], mean) + #cc = crosscorrelation(interpoled_snip[15 + 10*-cut_width[0]-10*7:-15+ -10*cut_width[1]+ 31], mean[10*-cut_width[0]-10*7:-10*cut_width[1]+31]) + offset = -alignwidth + np.argmax(cc) + aligned_snip = interpoled_snip[alignwidth-offset:-alignwidth-offset] if offset != -alignwidth else interpoled_snip[2*alignwidth:] + if len(aligned_snip[~np.isnan(aligned_snip)])>0: + aligned_snips[i] = aligned_snip + return aligned_snips + +def pc(dataset): + """ + Calculates the principal components of a dataset using the python module scikit-learn's principal component analysis + + Parameters + ---------- + dataset: ndarray + dataset of which the principal components are to be calculated. + twodimensional array of shape (observations, features) + + Returns + ------- + pc_comp: ndarray + principal components of the dataset + + """ + pc_comp= PCA().fit_transform(dataset) + return pc_comp + +def dbscan(pcs, events, order, eps, min_samples, takekm): + """ + improve description, add parameter and returns + + calculates clusters of high spatial density of the given observations in their feature space. + #For example, the first few principal components of the data could be used as features for the classification. + + Parameters + ---------- + pcs: ndarray + %TODO + shape(samples, features) + ... + + Returns + ------- + labels: ndarray + labels of the clusters of each observation + + """ + # pcs (samples, features) + # X (samples, features) + try: + X = pcs[:,:order] + except: + X = pcs[:,order] + # ############################################################################# + # Compute DBSCAN + clusters = DBSCAN(eps, min_samples).fit(X) + #from sklearn.cluster import KMeans + core_samples_mask = np.zeros_like(clusters.labels_, dtype=bool) + core_samples_mask[clusters.core_sample_indices_] = True + labels = clusters.labels_ + return labels + +def cluster_events(features, events, order, eps, min_samples, takekm, method='DBSCAN'): + """ + clusters the given events using the given feature space and the clustering algorithm of choice and appends the assigned cluster number to the event's properties. + + Parameters + ---------- + + Returns + ------- + + """ + ######################## function maybe could be even more generic, ? (dependant on datatype of "events" ) + if method == 'DBSCAN': + labels = dbscan(features,events, order, eps, min_samples, takekm) + elif method == 'kMean': + pass + # To be implemented + #labels = kmeans([]) + return labels + events = np.append(events,[labels], axis = 0) + return events + +def connect_blocks(oldblock): + ''' + used to connect blocks. + transfers data from the previous analysis block to the current block + ''' + newblock = Peaklist([]) + newblock.lastofclass = oldblock.lastofclass + newblock.lastofclassx = oldblock.lastofclassx + newblock.classesnearby = oldblock.classesnearby + newblock.classesnearbypccl = oldblock.classesnearbypccl + newblock.classesnearbyx = [clnearbyx - oldblock.len for clnearbyx in oldblock.classesnearbyx] + newblock.classamount = oldblock.classamount + newblock.len = oldblock.len + return newblock + +def alignclusterlabels(labels, peaklist, peaks, data='test'): + ''' + used to connect blocks. + changes the labels of clusters in the current block to fit with the labels of the previous block + ''' + overlapamount = len(peaks[:,peaks[0]<30000]) + if overlapamount == 0: + return None + old_peaklist = copy.deepcopy(peaklist) #redundant + overlappeaks = copy.deepcopy(peaks[:,:overlapamount]) + overlap_peaklist = copy.deepcopy(old_peaklist) + # overlappeaks = np.append(overlappeaks,[labels], axis = 0) + overlappeaks[3]=[-1]*len(overlappeaks[0]) + #overlap_peaklist = connect_blocks(old_peaklist) + overlap_peaklist.classesnearbypccl = [-1]*len(overlap_peaklist.classesnearbypccl) + classified_overlap = ampwalkclassify3_refactor(overlappeaks,overlap_peaklist)[0] + #plot_events_on_data(classified_overlap,data) + labeltranslator = {} + for cl in np.unique(classified_overlap[4]): + if len(labeltranslator) <= len(np.unique(labels)): + labelindex = np.where(classified_overlap[4] == cl)[0] + label = labels[labelindex] + labelindex = labelindex[np.where(label == stats.mode(label)[0])[0][0]] + newlabel = labels[labelindex] #waveform label belonging to the class cl in the new block + try: + oldlabel_ind= old_peaklist.classesnearby.index(cl) + oldlabel = old_peaklist.classesnearbypccl[oldlabel_ind] + # oldlabel = old_peaklist.classesnearbypccl[::-1][old_peaklist.classesnearby[::-1].index(cl)] #last label belonging to cl in the old block + except: + oldlabel = -2 + try: + labeltranslator[oldlabel] + except KeyError: + labeltranslator[oldlabel] = newlabel + for lbl in old_peaklist.classesnearbypccl: + try: labeltranslator[lbl] + except KeyError: labeltranslator[lbl] = lbl + peaklist.classesnearbypccl = [labeltranslator[lbl] for lbl in peaklist.classesnearbypccl] + +def ampwalkclassify3_refactor(peaks,peaklist): + """ + + classifies peaks/EOD_events into different classes by their amplitude. + + Takes list of peaks and list of properties of the list of the last analysis block + Classifies the single peaks in the direction of their occurence in time, based on their amplitude and + their previously assigned class based on their waveform (... using the method cluster_events on the + principal components of the snippets around the single peaks) + + Method: + calculates differences in amplitude between the current peak and different amplitudeclasses that are nearby. creates new amplitudeclass if no class is close enough. creates no new class if the peaks's waveformclass is a noiseclass of the DBSCAN algorithm. Does not compare peaks of different Waveformclasses. + + --can be used without prior waveformclasses, resulting in classification solely on the amplitude development + pcclclasses need to be set to the same class herefore, .... . not practical, but should be possible to + split up into more general functions + """ + classamount = peaklist.classamount + lastofclass = peaklist.lastofclass + lastofclassx = peaklist.lastofclassx + a=0 + elem = 0 + thresholder = [] + comperr = 1 + classesnearby = peaklist.classesnearby + classesnearbyx = peaklist.classesnearbyx + classesnearbypccl = peaklist.classesnearbypccl + classes = np.zeros((len(peaks[0]))) + pcclasses = peaks[3] + positions = peaks[0] + heights = peaks[2] + cl = 0 + maxdistance = 30000 # Max distance to possibly belong to the same class + factor = 1.6 # factor by which a peak fits into a class, f.E: classheight = 1 , factor = 2 => peaks accepted in range (0.5,2) + c=0 + for peaknum, p in enumerate(peaks.T): + if len(lastofclass) == 0: + lastofclass[1] = deque() + lastofclassx[1] = deque() + lastofclass[1].append(heights[peaknum]) + lastofclassx[1].append(positions[peaknum]) + classesnearby.append(1) + classesnearbyx.append(-1) + classesnearbypccl.append(pcclasses[peaknum]) + classes[peaknum] = 1 + classamount += 1 + continue + time1 = time.time() + for i, cl in enumerate(classesnearby): + if (positions[peaknum] - classesnearbyx[i]) > maxdistance: + classesnearby.pop(i) + classesnearbyx.pop(i) + classesnearbypccl.pop(i) + lastofclassisis = [] + for i in classesnearby: + lastofclassisis.append(np.median(np.diff(lastofclassx[i]))) + meanisi = np.mean(lastofclassisis) + if 32000 > 20*meanisi> 6000: + maxdistance = 20*meanisi + cl = 0 # 'No class' + comperr = 1 + clnrby = np.unique(classesnearby) + + for i in clnrby: + classmean = np.mean(lastofclass[i]) + logerror = np.abs(np.log2(heights[peaknum])-np.log2(classmean)) + abserror = np.abs(heights[peaknum]-classmean) + logthresh = np.log2(factor) + #relerror = error + relerror = logerror + + if classesnearbypccl[classesnearby.index(i)] == pcclasses[peaknum] or pcclasses[peaknum] == -1:# or + if logerror < logthresh: ## SameClass-Condition + if relerror < comperr and (positions[peaknum]-classesnearbyx[classesnearby.index(i)])= 3: + lastofclass[cl].popleft() + if len(lastofclassx[cl]) >= 3: + lastofclassx[cl].popleft() + lastofclass[cl].append(heights[peaknum]) + lastofclassx[cl].append(positions[peaknum]) + classes[peaknum] = cl + else: # Add new class + cl = classamount+1 + classamount = cl + lastofclass[cl] = deque() + lastofclassx[cl] = deque() + lastofclass[cl].append(heights[peaknum]) + lastofclassx[cl].append(positions[peaknum]) + classes[peaknum] = cl + classesnearby.append(cl) + classesnearbyx.append(positions[peaknum]) + classesnearbypccl.append(pcclasses[peaknum]) + if len(classesnearby) >= 12: #kacke implementiert? + minind = classesnearbyx.index(min(classesnearbyx)) + del lastofclass[classesnearby[minind]] + del lastofclassx[classesnearby[minind]] + classesnearby.pop(minind) + classesnearbyx.pop(minind) + classesnearbypccl.pop(minind) + # for ind, clnrby in enumerate(reversed(classesnearby)): + # classesnearbyx + # del lastofclass[classesnearby[ind]] + # # del lastofclassx[classesnearby[minind]] + # classesnearby.pop(minind) + # classesnearbyx.pop(minind) + try: + ind=classesnearby.index(cl) + classesnearbyx[ind] = positions[peaknum] + except ValueError: + classesnearby.append(cl) + classesnearbyx.append(positions[peaknum]) + classesnearbypccl.append(pcclasses[peaknum]) + else: + if cl != 0: + classes[peaknum] = cl + else: + cl = classamount+1 + classamount = cl + lastofclass[cl] = deque() + lastofclassx[cl] = deque() + lastofclass[cl].append(heights[peaknum]) + lastofclassx[cl].append(positions[peaknum]) + classes[peaknum] = cl + classesnearby.append(cl) + classesnearbyx.append(positions[peaknum]) + classesnearbypccl.append(pcclasses[peaknum]) + if len(classesnearby) >= 12: #kacke implementiert? + minind = classesnearbyx.index(min(classesnearbyx)) + del lastofclass[classesnearby[minind]] + del lastofclassx[classesnearby[minind]] + classesnearby.pop(minind) + classesnearbyx.pop(minind) + classesnearbypccl.pop(minind) + # for ind, clnrby in enumerate(reversed(classesnearby)): + # classesnearbyx + # del lastofclass[classesnearby[ind]] + # # del lastofclassx[classesnearby[minind]] + # classesnearby.pop(minind) + # classesnearbyx.pop(minind) + try: + ind=classesnearby.index(cl) + classesnearbyx[ind] = positions[peaknum] + except ValueError: + classesnearby.append(cl) + classesnearbyx.append(positions[peaknum]) + classesnearbypccl.append(pcclasses[peaknum]) + peaklist.lastofclass = lastofclass + peaklist.lastofclassx = lastofclassx + peaklist.classesnearby = classesnearby + peaklist.classesnearbyx = classesnearbyx + peaklist.classesnearbypccl = classesnearbypccl + peaklist.classlist = classes # np.vectorize(lambda peak: peak.cl, otypes=[object])(peaklist.list) + peaklist.classamount = classamount + peaks = np.append(peaks,classes[None,:], axis = 0) + return peaks, peaklist + +def discard_wave_pulses(peaks, data): + ''' + discards events from a pulse_event list which are unusally wide (wider than a tenth of the inter pulse interval), which indicates a wave-type EOD instead of a pulse type + ''' + deleteclasses = [] + for cl in np.unique(peaks[3]): + peaksofclass = peaks[:,peaks[3] == cl] + isi = np.diff(peaksofclass[0]) + isi_mean = np.mean(isi) + widepeaks = 0 + isi_tenth_area = lambda x, isi : np.arange(np.floor(x-0.1*isi),np.ceil(x+0.1*isi),1, dtype = np.int) + for p in peaksofclass.T: + data = np.array(data) + try: + for dp_around in data[isi_tenth_area(p[0],isi_mean)]: + if dp_around <= p[1]-p[2]: + break + except (IndexError,ValueError) as e: + pass + else: + widepeaks+=1 + if widepeaks > len(peaksofclass)*0.5: + deleteclasses.append(cl) + for cl in deleteclasses: + peaks = peaks[:,peaks[3]!=cl] + return peaks + +def plot_events_on_data(peaks, data): + ''' + plots the detected events onto the data timeseries. If the events are classified, the classes are plotted in different colors and the class -1 (not belonging to a cluster) is plotted in black + ''' + plt.plot(range(len(data)),data, color = 'black') + if len(peaks)>3: + classlist = np.array(peaks[3],dtype=np.int) + if len(peaks) > 4: + classlist = np.array(peaks[4],dtype=np.int) + cmap = plt.get_cmap('jet') + colors =cmap(np.linspace(0, 1.0, 3000)) #len(np.unique(classlist)))) + np.random.seed(22) + np.random.shuffle(colors) + colors = [colors[cl] for cl in np.unique(classlist)] + for cl, color in zip(np.unique(classlist), colors): + if cl == -1: + color = 'black' + peaksofclass = peaks[:,classlist == cl] + plt.plot(peaksofclass[0],peaksofclass[1], '.', color = color, ms =20) + plt.scatter(peaksofclass[0], peaksofclass[2]) + else: + plt.scatter(peaks[0],peaks[1], color = 'red') + plt.show() + plt.close() + +def discard_short_classes(events, minlen): + ''' + returns all events despite events which are in classes with less than minlen members + ''' + classlist = events[3] + smallclasses = [cl for cl in np.unique(classlist) if len(classlist[classlist + == cl]) < + minlen] + delete = np.zeros(len(classlist)) + for cl in smallclasses: + delete[classlist == cl] == 1 + events = events[:,delete != 1] + return events + +def path_leaf(path): + ntpath.basename("a/b/c") + head, tail = ntpath.split(path) + return tail or ntpath.basename(head) +def save_EOD_events_to_npmmp(EOD_Events,eods_len,startblock,datasavepath,mmpname='eods.npmmp'): + n_EOD_Events = len(EOD_Events[0]) + savepath = datasavepath+"/"+mmpname + if startblock: + eods = np.memmap(savepath, + dtype='float64', mode='w+', + shape=(4,n_EOD_Events), order = 'F') + else: + dtypesize = 8#4 #float32 is 32bit = >4< bytes long ---changed to float64 -> 8bit + eods = np.memmap(savepath, dtype= + 'float64', mode='r+', offset = dtypesize*eods_len*4, + shape=(4,n_EOD_Events), order = 'F') + eods[:] = EOD_Events +def discard_small_EODs(EOD_Events, ultimate_threshold): + return EOD_Events[:,np.where(EOD_Events[2]>ultimate_threshold)] + return np.where(EOD_Events[2]>ultimate_threshold) + +def analyze_pulse_data(filepath,absolutepath=True, deltat=30, thresh=0.04, starttime = 0, endtime = 0, savepath = False,save=False, npmmp = False, plot_steps=False, plot_result=False): + ''' + analyzes timeseries of a pulse fish EOD recording + + Parameters + ---------- + filepath: WAV-file with the recorded timeseries + + deltat: int, optional + time for a single analysisblock (recommended less than a minute, due to principal component clustering on the EOD-waveforms) + + thresh: float, optional + minimum threshold for the peakdetection (if computing frequencies recommended a tiny bit lower than the wished threshold, and instead discard the EOD below the wished threshold after computing the frequencies for each EOD.) + + starttime: int or, str of int, optional + time into the data from where to start the analysis, seconds. + + endtime: int or str of int, optional + time into the data where to end the analysis, seconds, larger than starttime. + + savepath = Boolean or str, optional + path to where to save results and intermediate result, only needed if save or npmmp is True. + string to specify a relative path to the directory where results and intermediate results will bed + or False to use preset savepath, which is ~/filepath/ + or True to specify savepath as input when the script is running + + save: Boolean, optional + True to save the results into a npy file at the savepath + + npmmp: Boolean, optional + True to save intermediate results into a npmmp at the savepath, only recommended in case of memory overflow + + plot_steps: Boolean, optional + True to plot the results of each analysis block + + plot_results: Boolean, optional + True to plot the results of the final analysis. Not recommended for long recordings due to %TODO + + Returns + ------- + eods: numpy array + 2D numpy array. first axis: attributes of an EOD (x (datapoints), y (recorded voltage), height (difference from maximum to minimum), class), second axis: EODs in chronological order. + ''' + import sys + import numpy as np + import copy + from scipy.stats import gmean + from scipy import stats + from scipy import signal + from scipy import optimize + import matplotlib + from fish import ProgressFish + import matplotlib.pyplot as plt + from thunderfish.dataloader import open_data + from thunderfish.peakdetection import detect_peaks + from scipy.interpolate import interp1d + from scipy.signal import savgol_filter + from collections import deque + import ntpath + import nixio as nix + import time + import os + from shutil import copy2 + from ownDataStructures import Peak, Tr, Peaklist + import DextersThunderfishAddition as dta + from IPython import embed + # parameters for the analysis + thresh = 0.04 # minimal threshold for peakdetection + peakwidth = 20 # width of a peak and minimal distance between two EODs + # basic parameters for thunderfish.dataloader.open_data + verbose = 0 + channel = 0 + ultimate_threshold = thresh+0.01 + startblock = 0 + # timeinterval to analyze other than the whole recording + #starttime = 0 + #endtime = 0 + #timegiven = 0 + home = os.path.expanduser('~') + if absolutepath: + filepath = home+ '/'+ filepath + #os.chdir(home) + #save = int(save) + #plot_steps = int(plot_steps) + starttime = int(starttime) + endtime = int(endtime) + timegiven = False + if endtime > starttime>=0: + timegiven = True + peaks = np.array([]) + troughs = np.array([]) + filename = path_leaf(filepath) + eods_len = 0 + if savepath==False: + datasavepath = home+'/'+filename[:-4] + elif savepath==True: + datasavepath = input('With the option npmmp enabled, a numpy memmap will be saved to: ').lower() + else: datasavepath=savepath + + if save and (os.path.exists(datasavepath+"/eods8_"+filename[:-3]+"npy") or os.path.exists(datasavepath+"/eods5_"+filename[:-3]+"npy")): + print('there already exists an analyzed file, aborting. Change the code if you don\'t want to abort') + quit() + if npmmp: + #proceed = input('With the option npmmp enabled, a numpy memmap will be saved to ' + datasavepath + '. continue? [y/n] ').lower() + proceed = 'y' + if proceed != 'y': + quit() + # starting analysis + with open_data(filepath, channel, deltat, 0.0, verbose) as data: + + samplerate = data.samplerate + + # selected time interval + if timegiven == True: + parttime1 = starttime*samplerate + parttime2 = endtime*samplerate + data = data[parttime1:parttime2] + + #split data into blocks + nblock = int(deltat*samplerate) + if len(data)%nblock != 0: + blockamount = len(data)//nblock + 1 + else: + blockamount = len(data)//nblock + print('blockamount: ' , blockamount) + progress = 0 + print(progress, '%' , flush = True, end = " ") + fish = ProgressFish(total = blockamount) + for idx in range(0, blockamount): + blockdata = data[idx*nblock:(idx+1)*nblock] + if progress < (idx*100 //blockamount): + progress = (idx*100)//blockamount + progressstr = ' Filestatus: ' + fish.animate(amount = idx, dexextra = progressstr) + pk, tr = detect_peaks(blockdata, thresh) + troughs = tr + if len(pk) > 3: + peaks = dta.makeeventlist(pk,tr,blockdata,peakwidth) + peakindices, peakx, peakh = dta.discardnearbyevents(peaks[0],peaks[1],peakwidth) + peaks = peaks[:,peakindices] + if len(peaks) > 0: + if idx > startblock: + peaklist = dta.connect_blocks(peaklist) + else: + peaklist = Peaklist([]) + aligned_snips = dta.cut_snippets(blockdata,peaks[0], 15, int_met = "cubic", int_fact = 10,max_offset = 1.5) + pcs = dta.pc(aligned_snips)#pc_refactor(aligned_snips) + order = 5 + minpeaks = 3 if deltat < 2 else 10 + labels = dta.cluster_events(pcs, peaks, order, 0.4, minpeaks, False, method = 'DBSCAN') + peaks = np.append(peaks,[labels], axis = 0) + #dta.plot_events_on_data(peaks, blockdata) + num = 1 + if idx > startblock: + dta.alignclusterlabels(labels, peaklist, peaks,data=blockdata) + peaks, peaklist = dta.ampwalkclassify3_refactor(peaks, peaklist) # classification by amplitude + minlen = 6 + peaks = dta.discard_short_classes(peaks, minlen) + if len(peaks[0]) > 0: + peaks = dta.discard_wave_pulses(peaks, blockdata) + if plot_steps == True: + dta.plot_events_on_data(peaks, blockdata) + pass + peaklist.len = nblock + worldpeaks = np.copy(peaks) + worldpeaks[0] = worldpeaks[0] + (idx*nblock) + thisblock_eods = np.delete(worldpeaks,3,0) + if npmmp: + if idx == startblock: + if not os.path.exists(datasavepath): + os.makedirs(datasavepath) + mmpname = "eods_"+filename[:-3]+"npmmp" + # save the peaks of the current buffered part to a numpy-memmap on the disk + save_EOD_events_to_npmmp(thisblock_eods,eods_len,idx==startblock,datasavepath,mmpname) + eods_len += len(thisblock_eods[0]) + else: + if idx > 0: + all_eods = np.concatenate((all_eods,thisblock_eods),axis = 1) + else: + all_eods = thisblock_eods + #dta.plot_events_on_data(all_eods,data) + print('returnes analyzed EODS. Calculate frequencies using all of these but discard the data from the EODS within the lowest few percent of amplitude') + if npmmp: + all_eods = np.memmap(datasavepath+'/'+mmpname, dtype='float64', mode='r+', shape=(4,eods_len), order = 'F') + if save == 1: + path = filename[:-4]+"/" + if not os.path.exists(path): + os.makedirs(path) + if eods_len > 0: + np.save(datasavepath+"/eods8_"+filename[:-3]+"npy", all_eods) + print('Saved!') + else: + print('not saved') + return all_eods + +def analyze_long_pulse_data_file(filepath,save=0,plot_steps=0,new=1,starttime = 0, endtime = 0): + """ + analyzes timeseries of a pulse fish EOD recording + """ + # Script to detect and classify EODs in recordings of weakly electric pulse + # fish, Dexter Früh, 2018 + # + # results will be saved in workingdirectory/recording/ + # + # input: + # - [Recorded Timeseries] recording.WAV + # outputs(optional): + # - [Detected and Classified EODs] + # (Numpy Array with Shape (Number of EODs, 4 (Attributes of EODs)), + # with the EOD-Attributes + # - x-location of the EOD + # (time/x-coordinate/datapoint in recording) + # - y-location of the EOD + # (Amplitude of the positive peak of the pulse-EOD) + # - height of the EOD(largest distance between peak and through in the EOD) + # - class of the EOD + # eods_recording.npy + # - [plots of the results of each analyse step for each + # analysepart (timeinterval of length = deltat) of the recording] + # + # required command line arguments at function call + # - save : if True, save the results to a numpy file (possibly + # overwrite existing) + # - plot : if True, plot results in each analysestep + # - new : if True, do a new analysis of the recording, even if there + # is an existing analyzed .npy file with the right name. + # + import sys + import numpy as np + import copy + from scipy.stats import gmean + from scipy import stats + from scipy import signal + from scipy import optimize + import matplotlib + from fish import ProgressFish + import matplotlib.pyplot as plt + from thunderfish.dataloader import open_data + from thunderfish.peakdetection import detect_peaks + from scipy.interpolate import interp1d + from scipy.signal import savgol_filter + from collections import deque + import ntpath + import nixio as nix + import time + import os + from shutil import copy2 + from ownDataStructures import Peak, Tr, Peaklist + import DextersThunderfishAddition as dta + from IPython import embed + # parameters for the analysis + + deltat = 30.0 # seconds of buffer size + thresh = 0.04 # minimal threshold for peakdetection + peakwidth = 20 # width of a peak and minimal distance between two EODs + # basic parameters for thunderfish.dataloader.open_data + verbose = 0 + channel = 0 + ultimate_threshold = thresh+0.01 + startblock = 0 + # timeinterval to analyze other than the whole recording + #starttime = 0 + #endtime = 0 + #timegiven = 0 + home = os.path.expanduser('~') + os.chdir(home) + new = int(sys.argv[4]) + save = int(sys.argv[2]) + plot = int(sys.argv[3]) + starttime = int(starttime) + endtime = int(endtime) + timegiven = False + if endtime > starttime>=0: + timegiven = True + peaks = np.array([]) + troughs = np.array([]) + filename = path_leaf(filepath) + datasavepath = filename[:-4] + proceed = input('Currently operates in home directory. If given a pulsefish recording filename.WAV, then a folder filename/ will be created in the home directory and all relevant files will be stored there. continue? [y/n] ').lower() + if proceed != 'y': + quit() + if not os.path.exists(datasavepath): + os.makedirs(datasavepath) + if save == 1: + print('files will be saved to: ', datasavepath) + eods_len = 0 + # starting analysis + if new == 1 or not os.path.exists(filename[:-4]+"/eods5_"+filename[:-3]+"npy"): + if filepath != home+ '/'+ datasavepath+'/'+filename: + print(filepath, datasavepath+'/'+filename) + proceed = input('Copy datafile to '+ datasavepath+ ' where all the other files will be stored? [y/n] ').lower() + if proceed == 'y': + copy2(filepath,datasavepath) + # import data + with open_data(filepath, channel, deltat, 0.0, verbose) as data: + samplerate = data.samplerate + nblock = int(deltat*data.samplerate) + + # selected time interval + if timegiven == True: + parttime1 = starttime*samplerate + parttime2 = endtime*samplerate + data = data[parttime1:parttime2] + + #split data into blocks + if len(data)%nblock != 0: + blockamount = len(data)//nblock + 1 + else: + blockamount = len(data)//nblock + + # progress bar + print('blockamount: ' , blockamount) + progress = 0 + print(progress, '%' , flush = True, end = " ") + fish = ProgressFish(total = blockamount) + + # blockwise analysis + for idx in range(0, blockamount): + blockdata = data[idx*nblock:(idx+1)*nblock] + # progressbar + if progress < (idx*100 //blockamount): + progress = (idx*100)//blockamount + progressstr = ' Filestatus: ' + fish.animate(amount = idx, dexextra = progressstr) +#---analysis----------------------------------------------------------------------- + # step1: detect peaks in timeseries + pk, tr = detect_peaks(blockdata, thresh) + troughs = tr + # continue with analysis only if multiple peaks are detected + if len(pk) > 3: + peaks = dta.makeeventlist(pk,tr,blockdata,peakwidth) + + #dta.plot_events_on_data(peaks, blockdata) + peakindices, peakx, peakh = dta.discardnearbyevents(peaks[0],peaks[1],peakwidth) + peaks = peaks[:,peakindices] + + if len(peaks) > 0: + # used to connect the results of the current block with the previous + if idx > startblock: + peaklist = dta.connect_blocks(peaklist) + else: + peaklist = Peaklist([]) + aligned_snips = dta.cut_snippets(blockdata,peaks[0], 15, int_met = "cubic", int_fact = 10,max_offset = 1.5) + pcs = dta.pc(aligned_snips)#pc_refactor(aligned_snips) + order = 5 + minpeaks = 3 if deltat < 2 else 10 + labels = dta.cluster_events(pcs, peaks, order, 0.4, minpeaks, False, method = 'DBSCAN') + peaks = np.append(peaks,[labels], axis = 0) + #dta.plot_events_on_data(peaks, blockdata) + num = 1 + if idx > startblock: + dta.alignclusterlabels(labels, peaklist, peaks,data=blockdata) + peaks, peaklist = dta.ampwalkclassify3_refactor(peaks, peaklist) # classification by amplitude + minlen = 6 # >=1 + peaks = dta.discard_short_classes(peaks, minlen) + if len(peaks[0]) > 0: + peaks = dta.discard_wave_pulses(peaks, blockdata) + # plots the data part and its detected and classified peaks + if plot_steps == True: + dta.plot_events_on_data(peaks, blockdata) + pass + worldpeaks = np.copy(peaks) + # change peaks location in the buffered part to the location relative to the + peaklist.len = nblock + # peaklocations relative to whole recording + worldpeaks[0] = worldpeaks[0] + (idx*nblock) + thisblock_eods = np.delete(peaks,3,0) + # save the peaks of the current buffered part to a numpy-memmap on the disk + mmpname = "eods_"+filename[:-3]+"npmmp" + save_EOD_events_to_npmmp(thisblock_eods,eods_len,idx==startblock,datasavepath,mmpname) + eods_len += len(thisblock_eods[0]) + # after the last buffered part has finished, save the memory mapped + # numpy file of the detected and classified EODs to a .npy file to the + # disk + eods = np.memmap(datasavepath+"/eods_"+filename[:-3]+"npmmp", dtype='float64', mode='r+', shape=(4,eods_len), order = 'F') + if save == 1: + path = datasavepath+"/" + if not os.path.exists(path): + os.makedirs(path) + if eods_len > 0: + print('Saved!') + np.save(datasavepath+"/eods8_"+datasavepath+"npy", eods) + else: + #np.save(filename[:-4]+"/eods5_"+filename[:-3]+"npy", thisblock_eods) + + print('not saved') + else: # if there already has been a certain existing result file and 'new' was set to False + print('already analyzed') + print('returnes analyzed EODS. Calculate frequencies using all of these but discard the data from the EODS within the lowest few percent of amplitude') + return eods + +def main(): + # if len(sys.argv[:])<5: + # print('arguments missing. required: filepath to .WAV and whether to save results, plot each step and start a new analysis as 1 (True) or 0 (False) each') + # elif len(sys.argv[:])>5: + # starttime = int(sys.argv[5]) + # endtime = int(sys.argv[6]) + + # #eods = analyze_pulse_data(sys.argv[1],starttime=0,endtime =50,save = 0, new = 1, plot_steps=0) + # #eods = analyze_pulse_data(sys.argv[1],starttime=0,endtime =50,save = 0, npmmp= True, plot_steps=0) + # #eods = analyze_pulse_data(sys.argv[1],save = 0, npmmp= True, plot_steps=0) + print(sys.argv[1]) + eods = analyze_pulse_data(sys.argv[1],save = True, npmmp= True) + print(eods) +# +# colors = plt.get_cmap('jet')(np.linspace(0, 1.0, np.unique(eods[3])[-1]+1)) #len(np.unique(classlist)))) +# np.random.seed(22) +# np.random.shuffle(colors) +# colors = [colors[int(cl)] for cl in eods[3]] +# +# plt.scatter(eods[0], eods[1], color = colors) +# plt.show() +# + #analyze_long_pulse_data_file(*sys.argv[1:]) + +if __name__ == '__main__': + main() + diff --git a/thunderfish/DextersThunderfishAddition/README b/thunderfish/DextersThunderfishAddition/README new file mode 100644 index 00000000..6daee32c --- /dev/null +++ b/thunderfish/DextersThunderfishAddition/README @@ -0,0 +1,21 @@ +leticia_filedata.txt -- long files with filename, date and day/night and time - missing some files +allfiles.txt -- all long files, but without details + + + +### analyse_pulse_data with the .WAV file as input produces: +eods5_70914L01_F15_1742.npy -- numpy array with results from analyze_pulse_data + +### this eodsfile can be fed to analyzeEods_lowpass.py to analyze the frequencies and amplitudes and produce the plots over the length of the full recordings... + + +70914L01_F15_1742_freqs2_lp.npy -- computed frequencies, meaned over seconds/half seconds +70914L01_F15_1742_amps2_lp.npy -- computed amplitudes, averaged over seconds/... +-- Numpy 2d-arrays, first axis : different fish classes, second axis: timesteps (seconds/half_seconds) + +70914L01_F15_1742_AmpFreq7_lp.pdf -- pdf with plotted EOD-amplitudes and frequencies, no classes plotted, highest number is latest version, lp = lowpass smoothing + +foraging_ ... npz file with status far, near, on, each np.nan for not this status and 1 if it has the status at a given timestep, same time resolution as freqs / amps - files + +ontimes, other files - outdated + diff --git a/thunderfish/DextersThunderfishAddition/__init__.py b/thunderfish/DextersThunderfishAddition/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/thunderfish/DextersThunderfishAddition/allfiles.txt b/thunderfish/DextersThunderfishAddition/allfiles.txt new file mode 100644 index 00000000..81646503 --- /dev/null +++ b/thunderfish/DextersThunderfishAddition/allfiles.txt @@ -0,0 +1,155 @@ +60615L01F9.WAV +60116L01G9.WAV +60115L01F10.WAV +60615L09G7.WAV +70730L06_G10_1737.WAV +70729L01_G10_1743.WAV +70729L01_G10_557.WAV +70524L01_G10.WAV +71112L01_G10_608.WAV +60926L01F13.WAV +70917L01_G10_1738.WAV +71113L01_G10_1757.WAV +70916L01_G10_630.WAV +70730L06_F15_1735.WAV +71110L01_F12_1650.WAV +70730L01_G10_609.WAV +80118L01_F12_1703.WAV +40113L01F10.WAV +70728L01_F15_1034.WAV +70116L01E14.WAV +60722L02G14.WAV +70320L01_B6.WAV +61107L01F14.WAV +70730L01_F15_605.WAV +60722L02D6.WAV +60722L01D6.WAV +60926L01B3.WAV +70114L02D14.WAV +70522L07_D10.WAV +70917L01_D7_740.WAV +61106L01F14Noche.WAV +60721L01G14.WAV +51104L01Embarcadero.WAV +71113L01_G10_636.WAV +80121L01_F13_1731.WAV +70915L01_G10_735.WAV +80120L01_F13_640.WAV +80121L01_F13_608.WAV +71112L01_F12_1817.WAV +60725L23G12.WAV +71111L01_G10_1810.WAV +70916L02_Electrophorus embarcadero_1905.WAV +70729L01_F15_1740.WAV +51106L01Electrophorus.WAV +31105L01F14.WAV +60612L02rondoniF3.WAV +70915L01_F15_1805.WAV +70917L02_Electrophorus embarcadero_1849.WAV +70917L01_G10_724.WAV +61103L01F14.WAV +71111L10_F12_1807.WAV +60613L02rondoniF9.WAV +40320L01_G11.WAV +60925L01F12.WAV +60925L01F11.WAV +70728L01_G10_1048.WAV +61108L01Electrophorus.WAV +60924L01F12.WAV +70915L01_Electrophorus embarcadero_1242.WAV +60722L01G14.WAV +60926L01F12.WAV +80121L01_F11_606.WAV +70730L01_Solo Electrophorus embarcadero_735.WAV +71113L01_F12_559.WAV +70114L01D14.WAV +70524L02_D10.WAV +70522L01_G12.WAV +60614L22rondoniG9.WAV +70915L01_G10_1802.WAV +70320L01_B6.WAV +40112L01G9.WAV +70730L02_Solo Electrophorus embarcadero.WAV +60923L01.WAV +70914L01_F15_1742.WAV +70115L01D14.WAV +70727L01_E14_1641.WAV +30927L04E12.WAV +61106L01Electrophorus.WAV +70917L01_D7_1739.WAV +60723L02G14.WAV +30924L01C2.WAV +40116L01G9.WAV +61107L01Electrophorus.WAV +60723L01G14.WAV +71112L01_G10_1818.WAV +70915L02_Electrophorus embarcadero_1900.WAV +60725L01G14.WAV +70521L01_D4.WAV +70729L01_Solo Electrophorus embarcadero_733.WAV +70728L01_F15_1721.WAV +70521L01_F12.WAV +30104L01_Solo Electrophorus embarcadero_1732.WAV +60613L01rondoniF11.WAV +61106L01F14.WAV +70914L01_E13_1735.WAV +40114L01F10.WAV +60724L01B1.WAV +80119L01_Downstream fuera grilla_1425.WAV +71113L01_F12_1755.WAV +70522L48_G12.WAV +80119L01_G12_1747.WAV +70319L01_F1.WAV +70112L01E15.WAV +80121L01_F11_1730.WAV +80120L01_G10_1731.WAV +80120L01_G10_626.WAV +30925L06F11.WAV +61107L01F14Noche.WAV +70524L01_D10.WAV +70916L02_F15_1804.WAV +60925L06F12.WAV +60113L01F10.WAV +70915L01_F13_738.WAV +60721L01G13.WAV +51107L01Electrophorus.WAV +70523L01_G11.WAV +50926L08F13.WAV +70917L01_Electrophorus embarcadero_811.WAV +60114L01F10.WAV +70522L01_D10.WAV +70317L01_B1.WAV +70520L01_D13.WAV +70729L01_F15_553.WAV +70916L01_F15_648.WAV +40317L01_G11.WAV +60723L01D5.WAV +70317L01_G11.WAV +71111L01_F12_603.WAV +40319L01_F1.WAV +71112L01_F12_605.WAV +70916L01_Electrophorus embarcadero_830.WAV +60926L04F12.WAV +61104L01F14.WAV +70520L01_F10.WAV +60924L03C2.WAV +70319L01_G11.WAV +60927L08F13.WAV +60927L01F13.WAV +60724L02G14.WAV +70916L01_G10_1805.WAV +70729L02_Solo Electrophorus embarcadero_1841.WAV +70728L01_G10_1725.WAV +71111L01_G10_1339.WAV +70523L01_D10.WAV +70320L01_G11.WAV +80120L01_F13_1733.WAV +61106L02Electrophorus.WAV +60724L01G14.WAV +60723L02D5.WAV +51105L01Embarcadero.WAV +70113L01D14.WAV +40115L01F10.WAV +70727L01_G10_1643.WAV +61105L01F14.WAV +60724L02B1.WAV diff --git a/thunderfish/DextersThunderfishAddition/analyseDexRefactor.py b/thunderfish/DextersThunderfishAddition/analyseDexRefactor.py new file mode 100644 index 00000000..f9617e7c --- /dev/null +++ b/thunderfish/DextersThunderfishAddition/analyseDexRefactor.py @@ -0,0 +1,2276 @@ +# Script to detect and classify EODs in recordings of weakly electric pulse +# fish, Dexter Früh, 2018 +# # it is suggested to save the recording in +# workingdirectory/recording/recording.WAV + +# results will be saved in workingdirectory/recording/ +# +# input: +# - [Recorded Timeseries] recording.WAV +# outputs(optional): +# - [Detected and Classified EODs] +# (Numpy Array with Shape (Number of EODs, 4 (Attributes of EODs)), +# with the EOD-Attributes +# - x-location of the EOD +# (time/x-coordinate/datapoint in recording) +# - y-location of the EOD +# (Amplitude of the positive peak of the pulse-EOD) +# - height of the EOD(largest distance between peak and through in the EOD) +# - class of the EOD +# eods_recording.npy +# - [plots of the results of each analyse step for each +# analysepart (timeinterval of length = deltat) of the recording] +# +# required command line arguments at function call +# - save : if True, save the results to a numpy file (possibly +# overwrite existing) +# - plot : if True, plot results in each analysestep +# - new : if True, do a new analysis of the recording, even if there +# is an existing analyzed .npy file with the right name. +# +# call with: +# python3 scriptname.py save plot new (starttime endtime[sec] for only +# partial analysis) +# +# other parameters are behind imports and some hardcoded at the relevant +# codestep +import sys +import numpy as np +import copy +from scipy.stats import gmean +from scipy import stats +from scipy import signal +from scipy import optimize +import matplotlib +from fish import ProgressFish +import matplotlib.pyplot as plt +from thunderfish.dataloader import open_data +from thunderfish.peakdetection import detect_peaks +from scipy.interpolate import interp1d +from scipy.signal import savgol_filter +from collections import deque +import ntpath +import nixio as nix +import time +import os +from shutil import copy2 + +from ownDataStructures import Peak, Tr, Peaklist +import DextersThunderfishAddition as dta + +from IPython import embed +# parameters for the analysis + +deltat = 30.0 # seconds of buffer size +thresh = 0.04 # minimal threshold for peakdetection +peakwidth = 20 # width of a peak and minimal distance between two EODs + +# basic parameters for thunderfish.dataloader.open_data +verbose = 0 +channel = 0 + +# timeinterval to analyze other than the whole recording +#starttime = 0 +#endtime = 0 +#timegiven = False + +def main(): # analyse_dex.py filename save plot new (optional starttime endtime [sec]) + home = os.path.expanduser('~') + os.chdir(home) + # defaults for optional arguments + timegiven = False + plot_steps = False + + # parse command line arguments - filepath, save, plot, new (, starttime, + # endtime) + filepath = sys.argv[1] + #thresh = 0.05 + save = int(sys.argv[2]) + plot_steps = int(sys.argv[3]) + new = int(sys.argv[4]) + if len(sys.argv[:])>5: + timegiven = True + starttime = int(sys.argv[5]) + endtime = int(sys.argv[6]) + #print(starttime, endtime) + # plot_steps = 1 + peaks = np.array([]) + troughs = np.array([]) + cutsize = 20 + maxwidth = 50 #10 + ultimate_threshold = thresh+0.01 + filename = path_leaf(filepath) + + proceed = input('Currently operates in home directory. If given a pulsefish recording filename.WAV, then a folder filename/ will be created in the home directory and all relevant files will be stored there. continue? [y/n]').lower() + if proceed == 'n': + quit() + elif proceed == 'y': + pass + #do something + elif proceed != 'y': + quit() + + ### ## ask user before overwriting + # if save == 1: + # proceed = input('Really want to save data and possibly overwrite existing? [y/n]').lower() + # if proceed == 'n': + # quit() + # elif proceed == 'y': + # printcat file | while read line + # do + #do something + # done('continuing') + # elif proceed != 'y': + # quit() + datasavepath = filename[:-4] + print(datasavepath) + eods_len = 0 + + ### ## starting analysis if it is wished or the analyzed EODs-file is not available in the working directory + if new == 1 or not os.path.exists(filename[:-4]+"/eods5_"+filename[:-3]+"npy"): + + ### ## import data + with open_data(filepath, channel, deltat, 0.0, verbose) as data: + + if save == 1 or save == 0: + # datasavepath = filename[:-4]+"/"+filename + if not os.path.exists(datasavepath): + os.makedirs(datasavepath) + copy2(filepath, datasavepath) + samplerate = data.samplerate + + ### ## split datalength into smaller blocks + nblock = int(deltat*data.samplerate) + if timegiven == True: + #print(starttime, samplerate) + parttime1 = starttime*samplerate + # parttime1 = samplerate * 10270 + parttime2 = endtime*samplerate + data = data[parttime1:parttime2] + if len(data)%nblock != 0: + blockamount = len(data)//nblock + 1 + else: + blockamount = len(data)//nblock + bigblock = [] + + ### ## output first (0%) progress bar + print('blockamount: ' , blockamount) + progress = 0 + print(progress, '%' , end = " ", flush = True) + fish = ProgressFish(total = blockamount) + olddatalen = 0 + startblock = 0 + ## iterating through the blocks, detecting peaks in each block + for idx in range(startblock, blockamount): + + ### ## print progress + if progress < (idx*100 //blockamount): + #print(progress, '%' , end = " ", flush = True) + progress = (idx*100)//blockamount + # print('.' , end = '') + progressstr = 'Partstatus: '+ str(0) + ' '*2 + ' % (' + '0' + ' '*4+ '/' + '?'+' '*4+ '), Filestatus:' + fish.animate(amount = idx, dexextra = progressstr) + progressstr = 'Partstatus: '+ 'Part ' + '0'+ '/''5'+' Filestatus:' + fish.animate(amount = idx, dexextra = progressstr) + + ### ## take debugging times, not used right now + time1 = time.time() + #print('took ', time1-time0, 's') + time0 = time1 + + # time measurement of parts of the algorithm to find time + # efficiency bottlenecks + bottletime = [] + bottletime.append(time.time()) #0 + datx = data[idx*nblock:(idx+1)*nblock] + ### ## smoothing of the timeseries and calculating autocorrelation - not used + #from scipy.signal import butter, lfilter + #datx = savgol_filter(datx, 11, 7) + #fs = samplerate # 1 ns -> 1 GHz + #cutoff = samplerate/10 # 10 MHz + #B, A = butter(5, cutoff / (fs / 3), btype='low') # 1st order Butterworth low-pass + #datx = lfilter(B, A, datx, axis=0) + #plt.plot(datx) + #plt.show() + #sig = data[-320000:-1] + #autocorr = signal.fftconvolve(sig, sig, mode='full') + #plt.plot(autocorr) + #plt.show() + #f, Pxx_den = signal.periodogram(sig, samplerate) + #plt.plot(Pxx_den) + #plt.show() + #x = savgol_filter(x, 11, 7) + + # ---------- analysis -------------------------------------------------------------------------- + # step1: detect peaks in timeseries + pk, tr = detect_peaks(datx, thresh) + troughs = tr + bottletime.append(time.time()) #1 + # continue with analysis only if multiple peaks are detected + if len(pk) > 2: + def makepeaklist_refactor(pk,tr,data): + ### ## create 'peaks' with x,y and height and discard peaks that seem to be no EODs based on their width and simple features like - no minimum close to the maximum. + # decide whether a peak or a through is detected first + pkfirst = int((min(pk[0],tr[0])= 0 and right_tr_ind < len(tr): + # ltr_x = tr[left_tr_ind] + # ltr_y = datx[ltr_x] + # rtr_x = tr[right_tr_ind] + # rtr_y = datx[rtr_x] + if min((pk_x - ltr_x),(rtr_x -pk_x)) > peakwidth: + pk_r[...] = False + elif max((pk_x - ltr_x),(rtr_x -pk_x)) <= peakwidth: + pk_h[...] = pk_y - min(ltr_y, rtr_y) + else: + if (pk_x-ltr_x)<(rtr_x-pk_x): + pk_h[...] = pk_y-ltr_y + else: + pk_h[...] = pk_y -rtr_y + elif left_tr_ind == -1: + if rtr_x-pk_x > peakwidth: + pk_r[...] = False + else: + pk_h[...] = pk_y- rtr_y + elif right_tr_ind == len(tr): + if pk_x-ltr_x > peakwidth: + pk_r[...] = False + else: + pk_h[...] = pk_y-ltr_y + peaks = np.array([peaks_x, peaks_y, peaks_h], dtype = np.float)[:,peaks_real!=0] + return peaks + peaks = dta.makeeventlist(pk,tr,datx,peakwidth) + #plt.plot(data[0:32000]) + #for ik in peaks.list[0:400]: + # plt.scatter(i.x, i.height) + #plt.show() + bottletime.append(time.time()) #2 + def discardnearbypeaks_refactor(peaks, peakwidth): + ### ## discard peaks that are close to each other, as a EOD mostly has more than one maximum and only one of the maxima is considered to be the EOD/EODlocation + unchanged = False + while unchanged == False: + x_diffs = np.diff(peaks[0]) + peaks_heights = peaks[2] + peaks_delete = np.zeros(len(peaks[0])) + for i, diff in enumerate(x_diffs): + if diff < peakwidth: + if peaks_heights[i+1] > peaks_heights[i] : + peaks_delete[i] = 1 + else: + peaks_delete[i+1] = 1 + peaks = peaks[:,peaks_delete!=1] + if np.count_nonzero(peaks_delete)==0: + unchanged = True + return peaks + peakindices, peakx, peakh = dta.discardnearbyevents(peaks[0],peaks[1],peakwidth) + peaks = peaks[:,peakindices] +# plt.plot(datx) +# plt.scatter(peaks[0],peaks[1]) +# plt.show() +# ### ## tries to calculate the noiselevel in the current recording part. Might actually not do anything at all, because the ultimate_threshold might be larger eitherway. some recordings have some exploitable data below this threshold, but most don't. And the rate of errors just gets too big for such small peaks. +# if len(peaks.list) > 2: +# tsh_n = calc_tsh_noise(peaks.list, datx) + bottletime.append(time.time()) #5 + # if len(peaks.list) > 2: + # noisediscard(peaks, ultimate_threshold, ultimate_threshold) + bottletime.append(time.time()) #6 + progressstr = 'Partstatus: '+ 'Part ' + '1'+ '/''5'+' Filestatus:' + fish.animate(amount = idx, dexextra = progressstr) + if len(peaks) > 0: + bottletime.append(time.time()) #7 + ### ## connects the current part with the one that came before, to allow for a continuous analysis + print('peaklist.len: ',peaklist.len) + if idx >= startblock+1: + peaklist = connect_blocks(peaklist) + else: + peaklist = Peaklist([]) + bottletime.append(time.time()) #8 + #print('\n ') + #print('cut_snips, with ' ,len(peaks.list), 'peaks') + # cuts snippets from the data time series around the peaks, interpolates them and aligns them + def cut_snippets_refactor(data, peaks, rnge): + snippets = [] + positions = np.array(peaks[0],dtype=np.int) + heights = peaks[2] + intfact = 10 + alignrange = 1.5 + alignwidth = int(np.ceil(alignrange * intfact) ) + for pos in positions: + snippets.append(data[(pos+rnge[0]):(pos+rnge[1])]) + scaled_snips = np.empty_like(snippets) + for i, snip in enumerate(snippets): + top = -rnge[0] + #plt.plot(snip) + scaled_snips[i] = snip * 1/heights[i] + #plt.plot(scaledsnips[i]) + #plt.show() + aligned_snips = np.empty((len(snippets), (rnge[1]-rnge[0])* + intfact-(2*alignwidth)-intfact)) + ipoled_snips = np.empty((len(snippets), (rnge[1]-rnge[0])*intfact-intfact)) + + for i, snip in enumerate(scaled_snips): + if len(snip) < ((rnge[1]-rnge[0])): + if i == 0: + snip = np.concatenate([np.zeros([((rnge[1]-rnge[0]) - len(snip))]),np.array(snip)]) + if i == len(scaledsnips): + snip = np.concatenate([snip, np.zeros([((rnge[1]-rnge[0])-len(snip))])]) + else: + snip = np.zeros([(rnge[1]-rnge[0])]) + interpolation = interpol(snip, 'cubic') #if len(snip) > 0 else np.zeros([(rnge[1]-rnge[0]-1)*intfact ]) + interpoled_snip = interpolation(np.arange(0, len(snip)-1, 1/intfact)) + intsnipheight = np.max(interpoled_snip) - np.min(interpoled_snip) + if intsnipheight == 0: + intsnipheight = 1 + interpoled_snip = (interpoled_snip - max(interpoled_snip))* 1/intsnipheight + ipoled_snips[i] = interpoled_snip + + mean = np.mean(ipoled_snips, axis = 0) + meantop = np.argmax(mean) + #plt.plot(mean) + #plt.show() + #plt.plot(mean[10*-rnge[0]-10*5:-10*rnge[1]+21]) + #plt.show() + for i, interpoled_snip in enumerate(ipoled_snips): + cc = crosscorrelation(interpoled_snip[alignwidth:-alignwidth], mean) + #cc = crosscorrelation(interpoled_snip[15 + 10*-rnge[0]-10*7:-15+ -10*rnge[1]+ 31], mean[10*-rnge[0]-10*7:-10*rnge[1]+31]) + offset = -15 + np.argmax(cc) + interpoled_snip = interpoled_snip[15-offset:-15-offset] if offset != -15 else interpoled_snip[30:] + #plt.plot(interpoled_snip) + if len(interpoled_snip[~np.isnan(interpoled_snip)])>0: + aligned_snips[i] = interpoled_snip + #plt.show() + return snippets, aligned_snips + snips, aligned_snips = dta.cut_snippets(datx,peaks[0], 15, int_met = "cubic", int_fact = 10,max_offset = 1.5) + # snips, scaledsnips = cut_snippets(datx, peaks.list, [-15,15]) + #wpf = wpfeats(scaledsnips) + #print(wpf[0]) + #print('pc') + progressstr = 'Partstatus: '+ 'Part ' + '2'+ '/''5'+' Filestatus:' + fish.animate(amount = idx, dexextra = progressstr) + #print('len ', len(scaledsnips)) + #print(scaledsnips) + def pc_refactor(cutsnippets): + # (observations, features) matrix + M = np.empty([len(cutsnippets), len(cutsnippets[0])]) + for i, snip in enumerate(cutsnippets): + M[i] = snip[:] + from sklearn.preprocessing import StandardScaler + from sklearn.decomposition import PCA + #StandardScaler().fit_transform(M) + pca = PCA() + pc_comp= pca.fit_transform(M) + return pc_comp + print(aligned_snips) + # calculates principal components + pcs = dta.pc(aligned_snips)#pc_refactor(aligned_snips) + #print('dbscan') + + # clusters the features(principal components) using dbscan algorithm. clusterclasses are saved into the peak-object as Peak.pccl + order = 5 + minpeaks = 3 if deltat < 2 else 10 + def dbscan_refactor(pcs, peaks, order, eps, min_samples, takekm, olddatalen): + # pcs (samples, features) + # X (samples, features) + from sklearn.cluster import DBSCAN + from sklearn import metrics + from mpl_toolkits.mplot3d import Axes3D + from sklearn.cluster import AgglomerativeClustering + try: + X = pcs[:,:order] + except: + X = pcs[:,order] + # ############################################################################# + # Compute DBSCAN + db = DBSCAN(eps, min_samples).fit(X) + from sklearn.cluster import KMeans + core_samples_mask = np.zeros_like(db.labels_, dtype=bool) + core_samples_mask[db.core_sample_indices_] = True + labels = db.labels_ ##### TODO ###### --- irgendwo Indexfehler oder so, last change - pcs richtige DImension + #peaks = np.array([np.append(peaks[:,i],labels[i]) for i in range(len(peaks[0]))]) + peaks = np.append(peaks,[labels], axis = 0) + return peaks + + peaks = dta.cluster_events(pcs, peaks, order, 0.4, minpeaks, False, olddatalen, method = 'DBSCAN') + #peaks = dbscan_refactor(pcs, peaks, order, 0.4, minpeaks, False, olddatalen) + + #plotPCclasses_ref(peaks, datx) + olddatalen = len(datx) + num = 1 + #classlist = np.vectorize(lambda peak: peak.pccl, otypes=[object])(peaks.list) + #snips, scaledsnips = cut_snippets(datx, peaks.list[classlist == num], [-15,5]) + #pcs2 = pc(scaledsnips, peaks.list[classlist==num]) + #pcs2 = wpfeats(scaledsnips) + #dbscan(pcs2, peaks.list[classlist == num],4, 0.15, 15, False) + #print('Classify') + progressstr = 'Partstatus: '+ 'Part ' + '3'+ '/''5'+' Filestatus:' + fish.animate(amount = idx, dexextra = progressstr) + + # classifies the peaks using the data from the clustered classes and a simple amplitude-walk which classifies peaks as different classes if their amplitude is too far from any other classes' last three peaks + peaks, peaklist = dta.ampwalkclassify3_refactor(peaks, peaklist, thresh) # classification by amplitude + # print(peaks.classlist) + print(peaks) + bottletime.append(time.time()) #9 + join_count=0 + # while True and joincc(peaklist, peaks) == True and join_count < 200: + # join_count += 1 + # continue + # print(peaks.classlist) + bottletime.append(time.time()) #10 + + # discards all classes that contain less than mincl EODs + mincl = 6 # >=1 + peaks = smallclassdiscard(peaks, mincl) + bottletime.append(time.time()) #11 + + # discards peaks, that are too wide compared to their + # inter spike intervals and seem to be wavesfish signals + # actually... works in some cases + if len(peaks[0]) > 0: + peaks = discardwaves_refactor(peaks, datx) + + # plots the data part and its detected and classified peaks + if plot_steps == True: + plotampwalkclasses_refactored(peaks, datx) + #pass + + # map the analyzed EODs of the buffer part to the whole + # recording + worldpeaks = np.copy(peaks) + bottletime.append(time.time()) #13 + # change peaks location in the buffered part to the location relative to the + idx = 1 + # peaklocations relative to whole recording + worldpeaks[0] = worldpeaks[0] + (idx*nblock) + peaklist.len = nblock +# for p in worldpeaks: +# = idx*nblock + p.x + bottletime.append(time.time()) #14 + bottletime.append(time.time()) #15 + # extract the relevant information from each peakobject of + # the buffered part and rearrange it as numpy array for + # computational efficienty + #x = xarray(thisblock) + #y = yarray(thisblock) + #h = heightarray(thisblock) + #cllist = clarray(thisblock) + #bottletime.append(time.time()) #16 + #thisblock_eods = np.array([x,y,h, cllist]) + #bottletime.append(time.time()) #17 + #bottletime.append(time.time()) #18 + #thisblockeods_len = len(thisblock_eods[0,:]) + thisblock_eods = np.delete(peaks,3,0) + thisblockeods_len = len(thisblock_eods[0]) + progressstr = 'Partstatus: '+ 'Part ' + '4'+ '/''5'+' Filestatus:' + fish.animate(amount = idx, dexextra = progressstr) + + # save the peaks of the current buffered part to a numpy-memmap on the disk + if thisblockeods_len> 0 and save == 1 or save == 0: + if idx == 0: + eods = np.memmap(datasavepath+"/eods_"+filename[:-3]+"npmmp", dtype='float64', mode='w+', shape=(4,thisblockeods_len), order = 'F') + # fp = np.memmap(filepath[:len(filename)]+"eods_"+filename[:-3]+"npy", dtype='float32', mode='w+', shape=(4,len(thisblock_eods[0,:]))) + dtypesize = 8#4 #float32 is 32bit = >4< bytes long ---changed to float64 -> 8bit + eods = np.memmap(datasavepath+"/eods_"+filename[:-3]+"npmmp", dtype='float64', mode='r+', offset = dtypesize*eods_len*4, shape=(4,thisblockeods_len), order = 'F') + eods[:] = thisblock_eods + eods_len += thisblockeods_len + bottletime.append(time.time()) #19 + #classes.extend(np.unique(cllist)) + + # to clean the plt buffer... + plt.close() + + # get and print the measured times of the algorithm parts for the + # current buffer + bottletime.append(time.time())#20 + time_a= bottletime[0] + for i, times in enumerate(bottletime): + #print('times: ' ,i, times-time_a) + time_a=times + + progressstr = 'Partstatus: '+ 'Part ' + '5'+ '/''5'+' Filestatus:' + fish.animate(amount = idx, dexextra = progressstr) + # plt.show() + + # after the last buffered part has finished, save the memory mapped + # numpy file of the detected and classified EODs to a .npy file to the + # disk + eods = np.memmap(datasavepath+"/eods_"+filename[:-3]+"npmmp", dtype='float64', mode='r+', shape=(4,eods_len), order = 'F') + print('before final saving: print unique eodcl: ' , np.unique(eods[3])) + if save == 1: + # #print('eods', eods[3]) + path = filename[:-4]+"/" + if not os.path.exists(path): + os.makedirs(path) + if eods_len > 0: + print('Saved!') + np.save(filename[:-4]+"/eods8_"+filename[:-3]+"npy", eods) + else: + #np.save(filename[:-4]+"/eods5_"+filename[:-3]+"npy", thisblock_eods) + print('not saved') + + else: # if there already has been a certain existing result file and 'new' was set to False + print('already analyzed') + + + # not used data implementation using NIX + # Save Data + + # Needed: + # Meta: Starttime, Startdate, Length + # x, y, h, cl, difftonextinclass -> freq ? , + + # Later: Find "Nofish" + # Find "Twofish" + # Find "BadData" + # Find "Freqpeak" + # ? Find "Amppeak" + # + + # bigblock = np.array(bigblock) + # x=xarray(bigblock) + # y=yarray(bigblock) + # cl=clarray(bigblock) + + + #nix file = nix.File.open(file_name, nix.FileMode.ReadWrite) + #nix b = file.blocks[0] + #nix nixdata = b.data_arrays[0] + #nix cldata = [] + #nix #print(classes) + #nix #print(b.data_arrays) + #nix for i in range(len(np.unique(classes))): + #nix cldata.append(b.data_arrays[i+1]) + + + # for cl in + + # for cl in + # x = thisfish_eods + + + #nix file.close() + +def path_leaf(path): + ntpath.basename("a/b/c") + head, tail = ntpath.split(path) + return tail or ntpath.basename(head) + +def fill_hidden(fishclasses): + + fishes = fishclasses + + nohidefishes = {} + for cl in fishes: + x =[] + y = [] + h = [] + fish = fishes[cl] + # #print('fish', fish) + fishisi = calcisi(fish) + isi = fishisi[0] + for i, newisi in enumerate(fishisi): + leftpeak = fish[i] + x.append(leftpeak.x) + y.append(leftpeak.y) + h.append(leftpeak.height) + if newisi > 2.8*isi: + guessx = leftpeak.x + isi + + while guessx < leftpeak.x + newisi-0.8*isi: + + peakx = peakaround(guessx, isi*0.1, fishes) + if peakx is not None: + x.append(peakx) + y.append(leftpeak.y) + h.append(leftpeak.height) + guessx = peakx+ isi + (peakx-guessx) + + continue + break + isi = newisi + nohidefishes[cl]= {'x':x,'y':y,'h':h} + return nohidefishes + +def plotheights(peaklist): + heights = heightarray(peaklist) + x_locations = xarray(peaklist) + plt.scatter(x_locations, heights) + plt.show() + +def ploteods(eods, data): + plt.plot(range(len(data)),data, color = 'black') + classlist = eods[3] + cmap = plt.get_cmap('jet') + colors =cmap(np.linspace(0, 1.0, 3000)) #len(np.unique(classlist)))) + np.random.seed(22) + np.random.shuffle(colors) + colors = [colors[cl] for cl in np.unique(classlist)] + # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.c', ms=20) + x=0 + if len(classlist)>0: + # #print(classlist) + # #print('classes: ' , np.unique(classlist)) + from collections import Counter + count = Counter(classlist) + # #print('longest class: ', count.most_common()[0]) + for num, color in zip(np.unique(classlist), colors): + peaksofclass = eods[:,:][:, classlist == num] + #xpred = linreg_pattern(peaksofclass[0:3]) + #for p in peaksofclass[0:3]: + # #print(p.x) + ##print(xpred, peaksofclass[3].x) + + #if len(peaksofclass) > 1000: + # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.', color = 'red', ms =20) + #else: + plt.plot(peaksofclass[0], peaksofclass[1], '.', color = color, ms =20) + plt.show() + +def fill_hidden_3(fishes): + + fishes = fishes + + nohidefishes = {} + for cl, fish in fishes.items(): + x =[] + y = [] + h = [] + # fish = fishes[cl] passt net, fishes is np.array mit (cl, (xyh)) + fishisi = np.diff(fish[0]) + isi = fishisi[0] + for i, newisi in enumerate(fishisi): + leftpeak = i + x.append(fish[0][i]) + y.append(fish[1][i]) + h.append(fish[2][i]) + # #print(cl, fish[0][i], isi, newisi) + if newisi > 2.8*isi: + guessx = fish[0][i] + isi + + while guessx < fish[0][i] + newisi-0.8*isi: + + peakx = peakaround3(guessx, isi*0.1, fishes) + if peakx is not None: + # #print(jup) + x.append(peakx) + y.append(fish[1][i]) + h.append(fish[2][i]) + guessx = peakx+ isi + (peakx-guessx) + + continue + break + isi = newisi + nohidefishes[cl]= {'x':x,'y':y,'h':h} + + return nohidefishes + +def peakaround2(guessx, interval, fishes): + found = False + for cl, fish in fishes.items(): + for px in fish['x']: + distold = interval + if px < guessx-interval: + continue + # #print('in area', guessx-interval) + if guessx-interval < px < guessx+interval: + found = True + dist = px-guessx + if abs(dist) < abs(distold): + distold = dist + if px > guessx+interval: + if found == True: + # #print(guessx, dist) + return guessx + dist + else: break + return None + +def peakaround3(guessx, interval, fishes): + found = False + for cl, fish in fishes.items(): + for px in fish[0]: + distold = interval + if px < guessx-interval: + continue + # #print('in area', guessx-interval) + if guessx-interval < px < guessx+interval: + found = True + dist = px-guessx + if abs(dist) < abs(distold): + distold = dist + if px > guessx+interval: + if found == True: + # #print(guessx, dist) + return guessx + dist + else: break + return None + +def peakaround(guessx, interval, fishes): + found = False + for cl, fish in fishes.items(): + for peak in fish: + + distold = interval + if peak.x < guessx-interval: + continue + # #print('in area') + if guessx-interval < peak.x < guessx+interval: + found = True + dist = peak.x-guessx + if abs(dist) < abs(distold): + distold = dist + if peak.x > guessx+interval: + if found == True: + # #print(guessx, dist) + return guessx + dist + else: break + return None + +def fill_holes(fishes): #returns peakx, peaky, peakheight # Fills holes that seem to be missed peaks in peakarray with fake (X/Y/height)-Peaks + retur = {} + lost = {} + for cl, fish in fishes.items(): + fishisi = np.diff(fish['x']) + mark = np.zeros_like(fishisi) + isi = 0 + ##print('mark', mark) + # #print('fishisi' , fishisi) + #find zigzag: + c=0 + c0= 0 + n=0 + for i, newisi in enumerate(fishisi): + if abs(newisi - isi)>0.15*isi: + if (newisi > isi) != (fishisi[i-1] > isi): + c+=1 + # #print(abs(newisi - isi), 'x = ', fish[i].x) + c0+=1 + elif c > 0: + n += 1 + if n == 6: + if c > 6: + # print ('zigzag x = ', fish['x'][i-6-c0], fish['x'][i-6]) + mark[i-6-c0:i-6]= -5 + c = 0 + c0=0 + n = 0 + + #if c > 0: + # #print(i, c) + # if c == 6: + # #print('zigzag!') + isi = newisi + isi = 0 + for i, newisi in enumerate(fishisi): + ##print('mark: ' , mark) + if mark[i] == -5: continue + if i+2 >= len(fishisi): + continue + if (2.2*isi > newisi > 1.8*isi) and (1.5*isi>fishisi[i+1] > 0.5*isi) : + mark[i] = 1 + isi = newisi + # #print('found 1!' , i) + elif (2.2*isi > newisi > 1.8*isi) and (2.2*isi> fishisi[i+1] > 1.8*isi) and (1.5*isi > fishisi[i+2] > 0.5*isi): + mark[i] = 1 + isi = isi + elif 3.4*isi > newisi > 2.6*isi and 1.5*isi > fishisi[i+1] > 0.5*isi: + mark[i] = 2 + + elif (0.6* isi > newisi > 0): + # #print('-1 found', i ) + if mark[i] ==0 and mark[i+1] ==0 and mark[i-1]==0 : + # isi = newisi + # continue + # #print('was not already set') + if fishisi[i-2] > isi < fishisi[i+1]: + mark[i] = -1 + # #print('-1') + elif isi > fishisi[i+1] < fishisi[i+2]: + mark[i+1] = -1 + # #print('-1') + isi = newisi + filldpeaks = [] + x = [] + y = [] + h = [] + x_lost=[] + y_lost=[] + h_lost=[] + # #print('filledmarks: ', mark) + for i, m in enumerate(mark): + if m == -1 : + # #print('-1 at x = ', fish['x'][i]) + continue + if m == -5: + x_lost.append(fish['x'][i]) + y_lost.append(fish['y'][i]) + h_lost.append(fish['h'][i]) + x.append(fish['x'][i]) + y.append(fish['y'][i]) + h.append(fish['h'][i]) + continue + x.append(fish['x'][i]) + y.append(fish['y'][i]) + h.append(fish['h'][i]) + if m == 1: + # #print('hofly added peak at x = ' , fish['x'][i]) + x.append(fish['x'][i] + fishisi[i-1]) + y.append( 0.5*(fish['y'][i]+fish['y'][i+1])) + h.append(0.5*(fish['h'][i]+fish['h'][i+1])) + elif m== 2: + x.append(fish['x'][i] + fishisi[i]) + y.append( 0.5*(fish['y'][i]+fish['y'][i+1])) + h.append(0.5*(fish['h'][i]+fish['h'][i+2])) + x.append(fish['x'][i] + 2*fishisi[i-1]) + y.append( 0.5*(fish['y'][i]+fish['y'][i+2])) + h.append(0.5*(fish['h'][i]+fish['h'][i+2])) + # #print('added at x = ', fish['x'][i] + fishisi[i]) + retur[cl] = {'x':x,'y':y,'h':h} + lost[cl] = {'xlost':x_lost,'ylost':y_lost,'hlost':h_lost} + # filledpeaks =np.array(filledpeaks) + # #print(filledpeaks.shape) + # filledpeaks. + return retur, lost + +def calc_tsh_noise(peaks, data): + heights = np.vectorize(lambda peak: peak.height)(peaks) + # peakx = xarray(peaks) + # peakxlist = peakx.tolist() + # #print('datenstdanfang: ', np.std(data)) + # datatsh = np.mean(np.abs(data))# + # datatsh = 2* np.std(data) + # peakareas = [i for x in peakx for i in range(x-10, x+10) if (i < len(data))] + # peakareas = np.arange(peakx-10, peakx+10, 1) + # relevantdata = [] + #peakareas = np.unique(peakareas) + # #print(len(peakareas), len(data), ' len peakarea and data' , datatsh) + #relevantdata is the data without the areas around the peaks, to calculate the standard deviation of the noise + #c = 0 + tsh = 0.1*np.std(heights) + + #for i, dat in enumerate(data): + # if peakareas[c] == i and c dist: + # dist = tdist + #print('dist', dist) + if dist>=0: + valid = True + if olddatalen > 0: + alignlabels(labels, peaks, olddatalen) + for i, p in enumerate(peaklist): + pcclasses[peaknum] = labels[i] + return valid + if takekm: + km = KMeans(n_clusters=3, n_init = 3, init = 'random', tol=1e-5, random_state=170, verbose = True).fit(X) + core_samples_mask = np.zeros_like(km.labels_, dtype=bool) + labels = km.labels_ + if takekm: + for i, p in enumerate(peaklist): + # print('label ', labels[i]) + pcclasses[peaknum] = p.pccl + # Number of clusters in labels, ignoring noise if present. + n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) + #print('Estimated number of clusters: %d' % n_clusters_) + # ############################################################################# + # Plot result + # Black removed and is used for noise instead. + unique_labels = set(labels) + colors = [plt.cm.Spectral(each) + for each in np.linspace(0, 1, len(unique_labels))] + fig = plt.figure() + ax = fig.add_subplot(111, projection = '3d') + for k, col in zip(unique_labels, colors): + if k == -1: + # Black used for noise. + col = [0, 0, 0, 1] + class_member_mask = (labels == k) + xy = X[class_member_mask] + # print(col) + ax.plot(xy[:, 0], xy[:, 1],xy[:,2], 'o', markerfacecolor=tuple(col), + markeredgecolor='k', markersize=14) + ax.set_title('Estimated number of clusters: %d' % n_clusters_) + #plt.show() + + + from sklearn.neighbors import kneighbors_graph + knn_graph = kneighbors_graph(X, 15, include_self=False) + ac = AgglomerativeClustering(linkage = 'complete', n_clusters = 3, connectivity = knn_graph).fit(X) + core_samples_mask = np.zeros_like(ac.labels_, dtype=bool) + labels = ac.labels_ + if takekm: + for i, p in enumerate(peaklist): + print('label ', labels[i]) + pcclasses[peaknum] = labels[i] + # Number of clusters in labels, ignoring noise if present. + n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) + #print('Estimated number of clusters: %d' % n_clusters_) + # ############################################################################# + # Plot result + # Black removed and is used for noise instead. + unique_labels = set(labels) + colors = [plt.cm.Spectral(each) + for each in np.linspace(0, 1, len(unique_labels))] + fig = plt.figure() + ax = fig.add_subplot(111, projection = '3d') + for k, col in zip(unique_labels, colors): + if k == -1: + # Black used for noise. + col = [0, 0, 0, 1] + class_member_mask = (labels == k) + xy = X[class_member_mask] + print(col) + ax.plot(xy[:, 0], xy[:, 1],xy[:,2], 'o', markerfacecolor=tuple(col), + markeredgecolor='k', markersize=14) + ax.set_title('Estimated number of clusters: %d' % n_clusters_) + #plt.show() + +def ampwalkclassify3_refactor(peaks,peaklist): # final classificator + classamount = peaklist.classamount + # for i in range(start, len(peaks)-start): + lastofclass = peaklist.lastofclass # dict of a lists of the last few heightvalues of a class, f.E ((1,[0.7,0.68,0.71]), (5, [0.2, 0.21, 0.21])) + lastofclassx = peaklist.lastofclassx # dict of a list of the last few x-values of a class + a=0 + elem = 0 + thresholder = [] + comperr = 1 + classesnearby = peaklist.classesnearby # list of the classes of the last n peaks (currently 12) f.E:[1,2,1,2,1,3,2,1,...] + classesnearbyx = peaklist.classesnearbyx # list of the x-values of the last n peaks, f.E:[13300, 13460, 13587, 13690, 13701, ...] + classesnearbypccl = peaklist.classesnearbypccl # list of the pc-classified classes of the last n peaks + classes = np.zeros((len(peaks[0]))) + pcclasses = peaks[3] + positions = peaks[0] + heights = peaks[1] + + # #print('nearbyclasses at start:' ,classesnearby, classesnearbyx) + # for peak in peaks: + # peak.cl = peak.pccl+2 + # peaklist.classlist = np.vectorize(lambda peak: peak.cl, otypes=[object])(peaklist.list) + # return peaks + cl = 0 + maxdistance = 30000 # Max distance to possibly belong to the same class + factor = 1.6 # factor by which a peak fits into a class, f.E: classheight = 1 , factor = 2 => peaks accepted in range (0.5,2) + c=0 + peakamount = len(peaks.T) + #fish = ProgressFish(total = peakamount) + for peaknum, p in enumerate(peaks.T): + perc = str((peaknum*100)//peakamount) + # fish.animate(amount = "", dexextra = 'Partstatus: '+ ' '*(3-len(perc)) +perc + ' % (' + ' '*(4-len(str(peaknum)))+str(peaknum) + '/' + ' ' *(4-len(str(peakamount)))+str(peakamount) + '), Filestatus:') + awc_btime = [] + if len(lastofclass) == 0: # Dict with all classes, containing the heights of the last few peaks + lastofclass[1] = deque() + lastofclassx[1]= deque() + lastofclass[1].append(heights[peaknum]) + lastofclassx[1].append(positions[peaknum]) + classesnearby.append(1) + classesnearbyx.append(-1) + classesnearbypccl.append(pcclasses[peaknum]) + classes[peaknum] = 1 + classamount += 1 + continue + time1 = time.time() + for i, cl in enumerate(classesnearby): + if (positions[peaknum]-classesnearbyx[i]) > maxdistance: + classesnearby.pop(i) + classesnearbyx.pop(i) + classesnearbypccl.pop(i) + lastofclassisis = [] + for i in classesnearby: + # print(i, classesnearby) + lastofclassisis.append(np.median(np.diff(lastofclassx[i]))) + meanisi = np.mean(lastofclassisis) + if 32000 > 20*meanisi> 6000: + maxdistance = 20*meanisi + #print(meanisi, maxdistance , 'maxdistance ----------------------------------------------------------------------------------------------') + + time2 = time.time() + awc_btime.append(time2-time1) #0 + cl = 0 # 'No class' + comperr = 1 + ##print('classesnearby at a peak', classesnearby) + clnrby = np.unique(classesnearby) + time1 = time.time() +# classmean = 0 + # if pcclasses[peaknum] == -1: + # factor = 1.2 + # else: + # factor = 1.6 + + for i in clnrby: + #print('cl: ', i) + # if classesnearbypccl[classesnearby.index(i)] == -1: + # factor = 2.2 + # else: factor = 1.6 + classmean = np.mean(lastofclass[i]) + logerror = np.abs(np.log2(heights[peaknum])-np.log2(classmean)) + abserror = np.abs(heights[peaknum]-classmean) + logthresh = np.log2(factor) + #ä#print(np.std(lastofclass[i])) absthresh = 0.5*classmean # #print('test log', np.abs(np.log2(np.array([0.4,0.5,1,1.5,2,2.4]))-np.log2(np.array([1,1,1,1,1,1]))) ) # abs(classmean*0.5) + #relerror = error + relerror = logerror + relabserror = abserror/thresh + # if 1140 < p.num < 1150: + # print(p.num) + # print('for classes at one peak: classmean, height, abserror, thresh', + # classmean,heights[peaknum], logerror, logthresh) + #print(len(classesnearbypccl), len(classesnearby)) + #print(classmean, heights[peaknum], logerror, logthresh, pcclasses[peaknum], classesnearbypccl[classesnearby.index(i)]) + if classesnearbypccl[classesnearby.index(i)] == pcclasses[peaknum] or pcclasses[peaknum] == -1:# or + if logerror < logthresh: ## SameClass-Condition + if relerror < comperr and (positions[peaknum]-classesnearbyx[classesnearby.index(i)]) 2*compareisierror: +# cl = holdlastcl + + time2 = time.time() + awc_btime.append(time2-time1) #1 + time1 = time.time() + if pcclasses[peaknum] != -1: + if cl != 0 : + #print(cl) + if len(lastofclass[cl]) >= 3: + lastofclass[cl].popleft() + if len(lastofclassx[cl]) >= 3: + lastofclassx[cl].popleft() + lastofclass[cl].append(heights[peaknum]) + lastofclassx[cl].append(positions[peaknum]) + classes[peaknum] = cl + else: # Add new class + cl = classamount+1 + #print('existingclasses: ', classamount) + classamount = cl + + #print('newclass: ----------------------------------------------------------------', cl) + lastofclass[cl] = deque() + lastofclassx[cl] = deque() + lastofclass[cl].append(heights[peaknum]) + lastofclassx[cl].append(positions[peaknum]) + classes[peaknum] = cl + classesnearby.append(cl) + classesnearbyx.append(positions[peaknum]) + classesnearbypccl.append(pcclasses[peaknum]) + ##print('tatsaechlich: ', cl) + if len(classesnearby) >= 12: #kacke implementiert? + minind = classesnearbyx.index(min(classesnearbyx)) + del lastofclass[classesnearby[minind]] + del lastofclassx[classesnearby[minind]] + #print(classesnearby[minind], 'del') + classesnearby.pop(minind) + classesnearbyx.pop(minind) + classesnearbypccl.pop(minind) + # for ind, clnrby in enumerate(reversed(classesnearby)): + # classesnearbyx + # del lastofclass[classesnearby[ind]] + # # del lastofclassx[classesnearby[minind]] + # classesnearby.pop(minind) + # classesnearbyx.pop(minind) + try: + ind=classesnearby.index(cl) + classesnearbyx[ind] = positions[peaknum] + # #print(ind ,' --------------------------------------here -----------------------------') + except ValueError: + classesnearby.append(cl) + classesnearbyx.append(positions[peaknum]) + classesnearbypccl.append(pcclasses[peaknum]) + else: + if cl != 0: + classes[peaknum] = cl + else: + cl = classamount+1 + #print('existingclasses: ', classamount) + classamount = cl + #print('newclass: ', cl) + lastofclass[cl] = deque() + lastofclassx[cl] = deque() + lastofclass[cl].append(heights[peaknum]) + lastofclassx[cl].append(positions[peaknum]) + classes[peaknum] = cl + classesnearby.append(cl) + classesnearbyx.append(positions[peaknum]) + classesnearbypccl.append(pcclasses[peaknum]) + if len(classesnearby) >= 12: #kacke implementiert? + minind = classesnearbyx.index(min(classesnearbyx)) + del lastofclass[classesnearby[minind]] + del lastofclassx[classesnearby[minind]] + #print(classesnearby[minind], 'del') + classesnearby.pop(minind) + classesnearbyx.pop(minind) + classesnearbypccl.pop(minind) + # for ind, clnrby in enumerate(reversed(classesnearby)): + # classesnearbyx + # del lastofclass[classesnearby[ind]] + # # del lastofclassx[classesnearby[minind]] + # classesnearby.pop(minind) + # classesnearbyx.pop(minind) + try: + ind=classesnearby.index(cl) + classesnearbyx[ind] = positions[peaknum] + # #print(ind ,' --------------------------------------here -----------------------------') + except ValueError: + classesnearby.append(cl) + classesnearbyx.append(positions[peaknum]) + classesnearbypccl.append(pcclasses[peaknum]) + # #print('classesnearby after a peak', classesnearby) + # for clnum, cls in enumerate(classesnearby): ## deleting almost identical classes (< % difference in amplitude) + # if cls == False: + # continue + # if True: + # continue + # compare = np.mean(lastofclass[cls]) + # for i in classesnearby[clnum:-1]: + # if i== False: + # continue + # if i != cls and abs(compare - np.mean(lastofclass[i])) < compare*0.01: ## + # # #print(compare) + # # #print( np.mean(np.vectorize(lambda peak: peak.height)(lastofclass[i]))) + # clindex = classesnearby.index(cls) + # classesnearby[clindex] = False + # classesnearbyx[clindex] = False + # del lastofclass[cls] + # del lastofclassx[cls] + # # cl = holdlastcl + # # if cl == cls: + # + # + # #print('combinedsomeclasses that were similar', cl, cls) + time2 = time.time() + # awc_btime.append(time2-time1) #2 + # classesnearby = [cls for cls in classesnearby if cls != False] + # classesnearbyx = [clx for clx in classesnearbyx if clx != False] + # + # + #print('awc_btime ', awc_btime , ' newpeak-------------------------------------------------------- :') + peaklist.lastofclass = lastofclass + peaklist.lastofclassx = lastofclassx + peaklist.classesnearby = classesnearby + peaklist.classesnearbyx = classesnearbyx + peaklist.classlist = classes # np.vectorize(lambda peak: peak.cl, otypes=[object])(peaklist.list) + peaklist.classamount = classamount + peaks = np.append(peaks,classes[None,:], axis = 0) + return peaks, peaklist + +def joincc(peaklist,peaks): + # connects classes that appear after each other... + # peaklist = peaks.list + joinedsome = False + classlist = peaks[4] + peaksofclass = {} + last = [] + connect = {} #connect classes in connect+ + classcount = dict.fromkeys(classlist, 0) + ##print(classcount) + #classcount = [0]*len(np.unique(classlist)) + # #print(np.unique(classlist)) + for cl in np.unique(classlist): + peaksofclass[cl]= peaks[:,classlist == cl] + for i in range(len(peaks[0])): # i is the increasing index of the peaks + p = peaks[:,i] + poc = peaksofclass[p[4]] + classcount[p[4]]+=1 + countclass = p[4] #the current class before it might be changed to the connected class + if p[4] in connect: + p[4] = connect[p[4]] #peakclass is changed to connected class + # #print('changed ', countclass, 'to', p.cl) + joinedsome = True + + if len(poc) == classcount[countclass]: #the current peak is last peak of its class + last = poc[-len(poc) if len(poc) <= 5 else 5:] #the last peaks of the class + # #print('last: ', last) + #mean_last = np.mean(np.vectorize(lambda peak: peak[2])(last)) + mean_last = np.mean(last[2,:]) + nextfirst = {} # the first peaks of the next coming class(es) + # #print('class: ', countclass, 'at x = ', p.x, 'mean_last: ', mean_last) + for nexti in range(20): # the next 10 peaks are considered if they belong to the same classe + if i + nexti >= len(peaks[0]): break + inextp = peaks[:,i+nexti] + if classcount[inextp[4]] == 0: #current peak is first peak of its class + # #print('found a new begin! its class:' , inextp.cl) + ponc = peaksofclass[inextp[4]] # + nextfirst[inextp[4]] = ponc[0:len(ponc) if len(ponc) <= 5 else 5] + # #print(np.mean(np.vectorize(lambda peak: peak.height)(nextfirst[inextp.cl]))) + # #print(nextfirst) + compare = 1 + c = 0 + nextclass = -1 + for nextcl, first in nextfirst.items(): + mean_nextfirst = np.mean(first[2,:])#np.mean(np.vectorize(lambda peak: peak.height)(first)) + # #print(mean_nextfirst) + error = abs(mean_nextfirst - mean_last)/(mean_nextfirst) + if error < 1: + if compare < error: + continue + compare = error + if nextcl in connect: #if the peak that ist considered belongs to a class, that is already supposed to be connected to the current class + pocc = peaksofclass[connect[nextcl]] #peaks of the currently supposed connected class + if ( abs(mean_nextfirst - np.mean(pocc[-len(pocc) if -len(pocc) <= 5 else 5:][2])) + < abs(mean_nextfirst - mean_last) ): + continue + nextclass = nextcl + if nextclass != -1: + connect[nextclass] = p[4] + # #print('connect ', p.cl , ' and ', nextcl) + for cl in peaklist.classesnearby: + if cl in connect: + # #print('cl, connect', cl, connect[cl]) + peaklist.classesnearby[peaklist.classesnearby.index(cl)] = connect[cl] + peaklist.lastofclass[connect[cl]]=peaklist.lastofclass[cl] + peaklist.lastofclassx[connect[cl]]= peaklist.lastofclassx[cl] + peaklist.classlist = peaks[4] + return joinedsome + # for poc in peaksofclass: + # if len(poc) >= 3: + # newlast = poc[-3:] + # first = poc[:3] + # else: + # newlast = poc[-len(poc):] + # first = poc[:len(poc)] + # if last != []: + # if abs(np.mean(first) - np.mean(last)) < 0: + # #print('oh') + +def discardwaves_refactor(peaks, data): + + deleteclasses = [] + for cl in np.unique(peaks[3]): + peaksofclass = peaks[:,peaks[3] == cl] + isi = np.diff(peaksofclass[0]) + isi_mean = np.mean(isi) + # #print('isismean',isi_mean) + widepeaks = 0 + # #print('width',peaksofclass[2].width) + isi_tenth_area = lambda x, isi:np.arange(np.floor(x-0.1*isi),np.ceil(x+0.1*isi),1, dtype = np.int) + for p in peaksofclass.T: + data = np.array(data) + try: + for dp_around in data[isi_tenth_area(p[0],isi_mean)]:#np.floor(p[0]-0.1*isi_mean), np.ceil(p[0]+0.1*isi_mean),1)]:# + if dp_around <= p[1]-p[2]: + break + except IndexError: + pass + else: + widepeaks+=1 + ## p.isreal_pleateaupeaks() + if widepeaks > len(peaksofclass)*0.5: + deleteclasses.append(cl) + for cl in deleteclasses: + peaks = peaks[:,peaks[3]!=cl] + return peaks + +def smallclassdiscard(peaks, mincl): + classlist = peaks[3] + smallclasses = [cl for cl in np.unique(classlist) if len(classlist[classlist + == cl]) < + mincl] + delete = np.zeros(len(classlist)) + for cl in smallclasses: + delete[classlist == cl] == 1 + peaks = peaks[:,delete != 1] + return peaks + +def makepeak(data_x,cutsize, maxwidth, peakx, ltr, data_ltr, rtr, data_rtr, num, minhlr): + #if len(data) > peakx + cutsize/2: + return Peak(peakx, data_x, maketr(data_ltr, ltr), maketr(data_rtr, rtr), maxwidth, num, minhlr)#data[peakx-cutsize/2:peakx+cutsize/2], num) + #else: + # return Peak(peakx, data[peakx], + # maketr(data, ltr), + # maketr(data, rtr), + # maxwidth, + # #data[peakx-cutsize/2:-1], + # num) + +def maketr(data_x, x): + if x is not None: + return Tr(x,data_x) + else: + return None + +def makepeaklist(pkfirst, data, pk, tr, cutsize, maxwidth): + peaklist = np.empty([len(pk)], dtype = Peak) + trtopk = pkfirst + pktotr = 1-pkfirst + trlen = len(tr) + pklen = len(pk) + minhlr = lambda i, mwl, mwr : min( + abs( data[pk[i]] - min( data[pk[i]-mwl:pk[i]] ) if len(data[pk[i]-mwl:pk[i]]) > 0 else 0 ) + , + abs( data[pk[i]]- min( + data[pk[i]:pk[i]+mwr] ) if len(data[pk[i]:pk[i]+mwr]) > 0 else 0 ) + ) + #print(min( data[pk[0]-0:pk[2]]) ) + + if pktotr == 0: + peaklist[0] = makepeak(data[0], cutsize, maxwidth, pk[0], None, None, tr[pktotr], data[pktotr], 0, minhlr(0, 0, maxwidth)) + else: + peaklist[0] = makepeak(data[0], cutsize, maxwidth, pk[0], + tr[-trtopk], + data[-trtopk], tr[pktotr], data[pktotr], + 0, minhlr(0, min(maxwidth, + pk[0]-tr[-trtopk]) , maxwidth)) + for i in range(1,pklen-1): + peaklist[i] = makepeak(data[pk[i]], cutsize, maxwidth, pk[i], tr[i-trtopk], data[tr[i-trtopk]], tr[i+pktotr],data[tr[i+pktotr]], i, minhlr(i, maxwidth, maxwidth)) + if pktotr == 0 and pklen <= trlen: + peaklist[pklen-1] = makepeak(data[pk[pklen-1]],cutsize, maxwidth, pk[pklen-1], tr[pklen-trtopk-1], data[pklen-trtopk-1], tr[pklen+pktotr-1], data[pklen+pktotr-1], i, minhlr(pklen-1, maxwidth, min(maxwidth, tr[pklen+pktotr-1]-pk[pklen-1]))) + else: + peaklist[pklen-1] = makepeak(data[pk[pklen-1]],cutsize, maxwidth, pk[pklen-1], tr[pklen-trtopk-1],data[pklen-trtopk-1], None, None, pklen-1, minhlr(pklen-1, maxwidth, 0)) + return peaklist + +#def doublepeaks(peaks, peakwidth): +# dif2 = peaks[1].x-peaks[0].x +# if dif2 > 5* peakwidth: +# peaks[0].real = False +# for i in range(1,len(peaks)-1): +# dif1 = dif2 +# dif2 = peaks[i+1].x-peaks[i].x +# if dif1 > 5* peakwidth and dif2 > 5* peakwidth: +# peaks[i].real = False +# if dif2 > 5* peakwidth: +# peaks[len(peaks)-1] = False +# return peaks + +def discardunrealpeaks(peaklist): + peaks = peaklist[:][np.vectorize(lambda peak: peak.real, otypes=[object])(peaklist) == True] + for i, p in enumerate(peaks): + pass + # p.num = i + return peaks + +def discardnearbypeaks(peaks, peakwidth): + peaksx = xarray(peaks) + pkdiff = np.diff(peaksx) + # peakwidth = avg_peakwidth(pknum,tr) + pknumdel= np.empty(len(peaksx)) + pknumdel.fill(False) +# peaksy = yarray(peaks) + peaksh = heightarray(peaks) + for i,diff in enumerate(pkdiff): + # #print(peaks[i].height) + if diff < peakwidth: #* peaks[i].height: ### Trial Error + if peaksh[i+1] > 1.01 *peaksh[i] : + pknumdel[i] = True + else: + # print(peaksh[i],peaksh[i+1]) + pknumdel[i+1] = True + peaks = peaks[pknumdel!=True] + for i, p in enumerate(peaks): + p.num = i + return peaks + +def interpol(data, kind): + #kind = 'linear' , 'cubic' + width = len(data) + x = np.linspace(0, width-1, num = width, endpoint = True) + return interp1d(x, data[0:width], kind , assume_sorted=True) + +def cutcenter(peak): + p = peak + cut = p.cut + pl=p.distancetoltr + pr=p.distancetortr + if pl is None: + pl = 10 + tx = p.x-10 + else: tx = p.ltr.x + if pr is None: + pr = 10 + if pl < p.maxwidth and pr > 1: + + width=len(cut) + # #print('distancetoltr',pl) + peakshape = cut + interpolfreq = 1 + xnew = np.linspace(0,len(peakshape)-1, len(peakshape)*interpolfreq, endpoint= True) + curvyf = interpol(peakshape) + curvy= curvyf(xnew) + #px = p.cutsize/2 * 4 + #left = px - (5*4) + #plt.plot(xnew, curvy) + #x_0 = optimize.fsolve(curvyf, 1.0) + # f = interp1d(x, y) + # f2 = interp1d(range(width), data[x:x+width], kind='cubic') + ##xnew = np.linspace(0, width-1, num = width*4, endpoint = True) + ##print(xnew) + # plt.plot(xnew,f2(xnew)) + ##print("show") + #plt.show + trx = (p.cutsize/2 - (p.x - tx) ) + if trx >0 : + xstart = trx + else: + xstart = 0 + # #print('pkx: ', p.x, 'ltrx: ', p.ltr.x) + # #print('trx in intpol', x) + x = xstart + if curvyf(x) < 0: + left = 0 + right= 0 + while(x < width-1 and curvyf(x) < 0) : + left = x + # #print(curvyf(x)) + x+=0.25 + right = x + # #print('x: ', x , 'left, right: ', curvyf(left), curvyf(right)) + x = left+(1-curvyf(right)/(curvyf(right)-curvyf(left)))*1/interpolfreq + # #print(x) + else: + x = 0 + # #print(x_int) + # plt.scatter(xstart, curvyf(xstart), marker = 'x', s=150, zorder=2, linewidth=2, color='red') + # plt.scatter(x, curvyf(x), marker='x', s=150, zorder=2, linewidth=2, color='black') + # plt.show + # #print(x_int) + #p.relcutcenter = (p.ltr.x + x_int)-p.x + ##print('cent',p.relcutcenter) + #return (p.ltr.x + x_int)-p.x + + # while(data[x]>0) + else: + x= 0 + + return x + +def relcutarray(peaks): + return np.vectorize(lambda peak: peak.relcutcenter)(peaks) + +def xarray(peaks): + if len(peaks)>0: + peakx = np.vectorize(lambda peak: peak.x)(peaks) + return peakx + else: return [] + +def yarray(peaks): + if len(peaks)>0: + return np.vectorize(lambda peak: peak.y)(peaks) + else: return [] + +def heightarray(peaks): + if len(peaks)>0: + return np.vectorize(lambda peak: peak.height)(peaks) + else: return [] + +def clarray(peaks): + if len(peaks)>0: + return np.vectorize(lambda peak: peak.cl)(peaks) + else: return [] +def pcclarray(peaks): + if len(peaks)>0: + return np.vectorize(lambda peak: peak.pccl)(peaks) + else: return [] + +def peakxarray( ): + peakx = np.empty([len]) + peakx = np.vectorize(lambda peak: peak.x)(peaks) + return peakx + +def peakyarray( ): + peaky= np.empty([len]) + return np.vectorize(lambda peak: peak.y)(peaks) + + +def classify( ): + #template = peaks[0] + meanfit = np.mean(np.vectorize(fit, otypes=[object])(template,peaks)) + for p in peaks: + if fit(template,p) < meanfit: + # #print('classified ', fit(template,p) , ' meanfit: ' , meanfit) + p.currentclass = 1 + +def classifyhiker(template, peaks): + meanfit = np.mean(np.vectorize(fitinterpol2, otypes=[object])(template,peaks)) + #toclassify = peaks.tolist() + firstnot = 0 + for c in range(1,5): + first = True + template = peaks[firstnot] + for i, p in enumerate(peaks[firstnot:]): + if p.currentclass == 0: + if fitinterpol2(template,p) < meanfit: + # #print('peak number ' , i, 'classified as ', c, fit(template,p) , ' meanfit: ' , meanfit) + p.currentclass = c + template = p + elif first == True: + # #print('peak number ' , i, 'classified as First! ', c, fit(template,p) , ' meanfit: ' , meanfit) + firstnot = i + first = False + else: + None + ##print('peak number ' , i, 'classified as not classified!', fit(template,p) , ' meanfit: ' , meanfit) + return peaks + + + # def Templatefitnext( , number, templnum): + # for p in peaks: + # if fit(peaks[templnum], p) < fitparameter: + +def cut_snippets(data, peaklist, rnge): + snippets = [] + positions = xarray(peaklist) + heights = heightarray(peaklist) + for pos in positions: + snippets.append(data[(pos+rnge[0]):(pos+rnge[1])]) + scaledsnips = np.empty_like(snippets) + for i, snip in enumerate(snippets): + top = -rnge[0] + # plt.plot(snip) + scaledsnips[i] = snip * 1/heights[i] + #plt.plot(scaledsnips[i]) + # print('plted') +# plt.show() + #print('1') + alignedsnips = np.empty((len(snippets), (rnge[1]-rnge[0])*10-30-10)) + standardized = np.empty((len(snippets), (rnge[1]-rnge[0])*10-10)) + intfact = 10 + for i, snip in enumerate(scaledsnips): + if len(snip) < ((rnge[1]-rnge[0])): + if i == 0: + snip =np.concatenate([np.zeros([((rnge[1]-rnge[0]) - len(snip))]),np.array(snip)]) + if i == len(scaledsnips): + snip = np.concatenate([snip, np.zeros([((rnge[1]-rnge[0])-len(snip))])]) + else: + # print('this') + snip = np.zeros([(rnge[1]-rnge[0])]) + interpoled_snip = interpol(snip)(np.arange(0, len(snip)-1, 1/intfact)) if len(snip) > 0 else np.zeros([(rnge[1]-rnge[0]-1)*intfact ]) #interpolfactor 10 + + intsnipheight = np.max(interpoled_snip) - np.min(interpoled_snip) + if intsnipheight == 0: + intsnipheight = 1 + interpoled_snip = (interpoled_snip - max(interpoled_snip))* 1/intsnipheight + standardized[i] = interpoled_snip + #print('2') + mean = np.mean(standardized, axis = 0) + #plt.plot(mean) +# plt.show() + #plt.plot(mean[10*-rnge[0]-10*5:-10*rnge[1]+21]) +# plt.show() + meantop = np.argmax(mean) + for i, snip in enumerate(standardized): + #plt.show() + interpoled_snip = snip #standardized[i] + cc = crosscorrelation(interpoled_snip[15:-15], mean) + #cc = crosscorrelation(interpoled_snip[15 + 10*-rnge[0]-10*7:-15+ -10*rnge[1]+ 31], mean[10*-rnge[0]-10*7:-10*rnge[1]+31]) + #plt.plot(interpoled_snip[15 + 10*-rnge[0]-10*7:-15+ -10*rnge[1]+ 31]) + #top = np.argmax(interpoled_snip) + #offset = meantop - top + #if not(-15 <= offset <= 15): offset = 0 + offset = -15 + np.argmax(cc) + interpoled_snip = interpoled_snip[15-offset:-15-offset] if offset != -15 else interpoled_snip[30:] + #print(offset) + #plt.plot(interpoled_snip) + if len(interpoled_snip[~np.isnan(interpoled_snip)])>0: + alignedsnips[i] = interpoled_snip + #plt.show() + # print('3') + return snippets, alignedsnips + + + +def fit(templ, peak): + fit = np.sum(np.square(templ.cut - peak.cut)) + return fit + +def fitinterpol2(templ,peak): + t = templ + p = peak + if p.real and t.real: + fit = np.sum(np.square(t.cutaligned-p.cutaligned)) + else: + fit = 0 + return fit + + + +def fitinterpol( templ, peak): + t = templ + p = peak + if p.real: + centerp = cutcenter(p) + centert = cutcenter(t) + shiftp = centerp-p.cutsize/2 + shiftt = centert-t.cutsize/2 + + if shiftp > -5: + shiftp = min(5, 5+centerp-p.cutsize/2) + else: shiftp = 0 + + if shiftt > -5: + shiftt = min(5, 5+centert-t.cutsize/2) + else: shiftt = 0 + + xnew = np.linspace(0,p.cutsize-11, (p.cutsize-1) * 4,endpoint = True) + #peak_interpoled = interpol(p.cut)(xnew) + #plt.plot(xnew, interpol(p.cut)(xnew+shift)) + # #print(interpol(templ.cut)(xnew+shiftt)-interpol(p.cut)(xnew+shiftp)) + fit = np.sum(np.square(interpol(templ.cut)(xnew+shiftt)-interpol(p.cut)(xnew+shiftp))) + else: + fit = 0 + return fit + + +def plotdata(peaks, data): + x = xarray(peaks) + y = yarray(peaks) + plt.plot(range(len(data)),data) + plt.plot(x, y, '.r', ms=20) + #for p in peaks: + # #print(p.height, p.x, p.y, p.distancetoltr, p.distancetortr, p.nexttrdistance) + # plt.plot(tr, data[tr], '.g', ms=20) + plt.show() + + +def plotdatabyx(peaksx, data): + x = peaksx + y = data[peaksx] + plt.plot(range(len(data)),data) + plt.plot(x, y, '.r', ms=20) + plt.show() + #for p in peaks: + # #print(p.height, p.x, p.y, p.distancetoltr, p.distancetortr, p.nexttrdistance) + # plt.plot(tr, data[tr], '.g', ms=20) + +def plotpeak(peaks): + #plt.plot(peaks), cutpeaks) #bei betrachtung aller blocks zu groß! + for p in peaks: + plt.plot(range(p.cutsize),p.cut) + #plt.plot(pk, x[pk] , '.r', ms=20) + plt.show() + + +def periodicinclass(peaks, cl): + noiselist = [] + classlist = np.vectorize(lambda peak: peak.cl, otypes=[object])(peaks) + peaks = xarray(peaks) + peaks = peaks[:][classlist == cl] + periodic = [] + periodiccollector = [] + error2 = [] + isperiodic = True + b=1 + c=2 + ctofar = False + compdif = 0 + dif = 0 + count = 1 + foundtriple = False + next = 0 + for i in range(len(peaks)-1): + if i != next: continue + # #print(i, 'foundtriple', foundtriple) + error2 = [] + b=1 + c=0 + A = peaks[i] + B = peaks[i+b] + compdif = dif + while foundtriple == True and count <= 3 and i+1 < len(peaks)-1: + while B-A < compdif*1.5 and i+b+1 < len(peaks)-1: + # #print('newdif: ', B-A, 'olddif:' , dif) + if abs((B-A) - compdif) < compdif*0.4: + error2.append(abs((B-A) - dif)) + b+=1 + B = peaks[i+b] + if len(error2) > 0: + bestB = error2.index(min(error2)) + B = peaks[i+1 + bestB] + periodic.append(B) + dif = 0.5*(dif + (B-A)) + # #print('match found') + b = 1+bestB + break + else: + count+=1 + compdif = dif*count + else: + if foundtriple == True: + # #print('no further match found, ') + isperiodic = False + + + + + while foundtriple == False and i+c< len(peaks)-1: + while i+c < len(peaks)-1: + A = peaks[i] + B = peaks[i+b] + C = peaks[i+c] + dif1 = B - A + dif2 = C - B + if (C-B > (B-A)*1.5): + break + if abs(dif1 - dif2) < dif1*0.4: + error2.append(abs(dif1-dif2)) + c +=1 + #C = peaks[i+c] # C weiterlaufenlassen, bis zu weit + else: + if len(error2) == 0: + # #print('no triple found') + isperiodic = False + if len(error2) > 0: + bestC = error2.index(min(error2)) + C = peaks[i+2 + bestC] + c = 2+ bestC + periodic.extend((A,B,C)) + dif1 = B - A + dif2 = C - B + # #print('dif1: ', dif1, 'dif2: ', dif2) + dif = 0.5*(dif2+dif1) + foundtriple = True + # #print('triple found', i+c, 'dif : ', dif) + else: + error2 = [] # B weiterlaufen lassen, C reset auf B+1 + b +=1 + c = b+1 + + if isperiodic == False: + if len(periodic) > 3: + periodiccollector.append(periodic) + isperiodic = True + periodic = [] + if c!=0: + next = i+c + else: + next = i+b + if len(periodiccollector) > 0: + # for i in range(len(periodiccollector)): + # #print('collector ', i, periodiccollector[i]) + return periodiccollector + else: + #print('no periodicity found') + return [] + + + +def noisediscard(peaklist, tsh_n, ultimate_threshold): + detected_noise = False + ##print('noisetsh: ', tsh_n) + for p in peaklist.list: + + if p.height < tsh_n or p.height < ultimate_threshold: + p.noise = True + detected_noise = True + peaklist.list = peaklist.list[:][np.vectorize(lambda peak: peak.noise, otypes=[object])(peaklist.list) == False] + # #print(peaks) + # for cl in classlist: + # diff = np.vectorize(lambda peak: peak.x, otypes=[object])(peaks[:][classlist == cl]) + # meandiff = np.mean(diff) + # msecompare = np.mean(np.square(diff-(diff*0.8))) + # mse = np.mean(np.square(diff-meandiff)) + # if mse > msecompare: + # noiselist.append(cl) + # for p in peaks: + #if p.cl in noiselist: + # if p.height < 0.1: + # p.noise = True + # peaks = peaks[:][np.vectorize(lambda peak: peak.noise, otypes=[object])(peaks) == False] + # return peaks + return detected_noise + + +def plotPCclasses_ref(peaks, data): + plt.plot(range(len(data)),data, color = 'black') + print(peaks) + classlist = np.array(peaks[3],dtype = 'int') + cmap = plt.get_cmap('jet') + colors =cmap(np.linspace(0, 1.0, 3000)) #len(np.unique(classlist)))) + np.random.seed(22) + np.random.shuffle(colors) + colors = [colors[cl] for cl in np.unique(classlist)] + print('classlist', np.unique(classlist)) + # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.c', ms=20) + # x=0 +# if len(classlist)>0: + # #print(classlist) + # #print('classes: ' , np.unique(classlist)) + #from collections import Counter + #count = Counter(classlist) + # #print('longest class: ', count.most_common()[0]) + for num, color in zip(np.unique(classlist), colors): + if num == -1 : + color = 'black' + peaksofclass = peaks[:,classlist == num] + #xpred = linreg_pattern(peaksofclass[0:3]) + #for p in peaksofclass[0:3]: + # #print(p.x) + ##print(xpred, peaksofclass[3].x) + #if len(peaksofclass) > 1000: + # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.', color = 'red', ms =20) + #else: + print(num) + plt.plot(peaksofclass[0], peaksofclass[1], '.', color = color, ms =20) + #plt.scatter(peaks[0], peaks[2]) + # for p in peaks: + # plt.text(p.x, p.y, p.num) + #plt.show() + + print('show pcclasses') + plt.show() + plt.close() + +def plotampwalkclasses_refactored(peaks, data): + plt.plot(range(len(data)),data, color = 'black') + classlist = np.array(peaks[3],dtype=np.int) + cmap = plt.get_cmap('jet') + colors =cmap(np.linspace(0, 1.0, 3000)) #len(np.unique(classlist)))) + np.random.seed(22) + np.random.shuffle(colors) + colors = [colors[cl] for cl in np.unique(classlist)] + # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.c', ms=20) + # x=0 +# if len(classlist)>0: + # #print(classlist) + # #print('classes: ' , np.unique(classlist)) + #from collections import Counter + #count = Counter(classlist) + # #print('longest class: ', count.most_common()[0]) + for cl, color in zip(np.unique(classlist), colors): + peaksofclass = peaks[:,classlist == cl] + #xpred = linreg_pattern(peaksofclass[0:3]) + #for p in peaksofclass[0:3]: + # #print(p.x) + ##print(xpred, peaksofclass[3].x) + + #if len(peaksofclass) > 1000: + # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.', color = 'red', ms =20) + #else: + + plt.plot(peaksofclass[0],peaksofclass[1], '.', color = color, ms =20) + plt.scatter(peaksofclass[0], peaksofclass[2]) + # for p in peaks: + # plt.text(p.x, p.y, p.num) + plt.show() + + # plt.show() + plt.close() + + +def crosscorrelation(sig, data): + autocorr = signal.fftconvolve(data, sig[::-1], mode='valid') + return autocorr + +def plottemplatefits(data, peaks, tr, templnum): + # + plotdata(peaks, data, tr) + plt.plot(range(len(data)),data) + classes = np.vectorize(lambda peak: peak.currentclass, otypes=[object])(peaks) + class1 = peaks[:][classes == 1 ] + if len(class1) > 0: + plt.plot(xarray(class1), yarray(class1), '.r', ms=20) + class2 = peaks[:][classes == 2 ] + if len(class2) > 0: + plt.plot(xarray(class2), yarray(class2), '.g', ms=20) + class3 = peaks[:][classes == 3 ] + if len(class3) > 0: + plt.plot(xarray(class3), yarray(class3), '.c', ms=20) + class4 = peaks[:][classes == 4 ] + if len(class4) > 0: + plt.plot(xarray(class4), yarray(class4), '.y', ms=20) + + # for p in peaks: # <-- + # plt.text(p.x , p.y, p.num) + + # plt.plot(tr, data[tr], '.g', ms=20) + plt.show() + +def linreg_pattern(peaks): + from sklearn import datasets, linear_model + from sklearn.metrics import mean_squared_error, r2_score + + peaksx = xarray(peaks) + peaksx = peaksx.reshape(-1,1) + #peaksh = heightarray(peaks) + #peakx = peak.x + # Create linear regression object + regr = linear_model.LinearRegression() + numbers = np.arange(len(peaks)).reshape(-1,1) + # Train the model using the training sets + regr.fit(numbers, peaksx) + + # Make predictions using the testing set + peakx_pred = regr.predict(len(peaks)) + # # The coefficients + # #print('Coefficients: \n', regr.coef_) + # # The mean squared error + # #print("Mean squared error: %.2f" + # % mean_squared_error(diabetes_y_test, diabetes_y_pred)) + # # Explained variance score: 1 is perfect prediction + # #print('Variance score: %.2f' % r2_score(diabetes_y_test, diabetes_y_pred) + + + # Plot outputs + #plt.scatter(peaksx, peaksh, color='black') + #plt.scatter(peakx, peakh_pred, color='blue') + + #plt.xticks(()) + #plt.yticks(()) + + # plt.show() + + return peakx_pred + +def linreg(peaks, peak): + from sklearn import datasets, linear_model + from sklearn.metrics import mean_squared_error, r2_score + + peaksx = xarray(peaks) + peaksx = peaksx.reshape(-1,1) + peaksh = heightarray(peaks) + peakx = peak.x + # Create linear regression object + regr = linear_model.LinearRegression() + + # Train the model using the training sets + regr.fit(peaksx, peaksh) + + # Make predictions using the testing set + peakh_pred = regr.predict(peakx) + + # # The coefficients + # #print('Coefficients: \n', regr.coef_) + # # The mean squared error + # #print("Mean squared error: %.2f" + # % mean_squared_error(diabetes_y_test, diabetes_y_pred)) + # # Explained variance score: 1 is perfect prediction + # #print('Variance score: %.2f' % r2_score(diabetes_y_test, diabetes_y_pred) + + + # Plot outputs + #plt.scatter(peaksx, peaksh, color='black') + #plt.scatter(peakx, peakh_pred, color='blue') + + #plt.xticks(()) + #plt.yticks(()) + + # plt.show() + + + + return peakh_pred + +def wp_transform(x): + import pywt + wp = pywt.WaveletPacket(data=x, wavelet='haar', mode='symmetric') + print('maxlevel: ', wp[''].maxlevel) + return (np.array([node.data for node in wp.get_level(wp[''].maxlevel, 'freq')])).flatten() + +def wpfeats(snips): + size = len(wp_transform(snips[0])) + wp = np.empty([len(snips), size]) + for i, snip in enumerate(snips): + print(wp_transform(snip)) + wp[i] = (wp_transform(snip)) + #wp = wp.T + print(wp[0]) + wpcoef = wp.T + print(wp[0]) + from sklearn.preprocessing import StandardScaler + wpcoef = StandardScaler().fit_transform(wpcoef) + coeffvalues = [] + for coeff in wpcoef: + stat, crit, sig = stats.anderson(coeff, dist = 'norm') + # coeffvalues.append(stat) + coeffvalues.append(np.sum(np.abs(coeff))) + coeffvalues = np.array(coeffvalues) + coeffs = np.argsort(coeffvalues)[::-1][:10] + print(coeffvalues[coeffs]) + return wp.T[coeffs] + + + + +def pc(cutsnippets, peaklist): + # (observations, features) matrix + M = np.empty([len(cutsnippets), len(cutsnippets[0])]) + for i, snip in enumerate(cutsnippets): + M[i] = snip[:] + from sklearn.preprocessing import StandardScaler + StandardScaler().fit_transform(M) + # #print(M.shape, ' Mshape') + # singular value decomposition factorises your data matrix such that: + # + # M = U*S*V.T (where '*' is matrix multiplication) + # + # * U and V are the singular matrices, containing orthogonal vectors of + # unit length in their rows and columns respectively. + # + # * S is a diagonal matrix containing the singular values of M - these + # values squared divided by the number of observations will give the + # variance explained by each PC. + # + # * if M is considered to be an (observations, features) matrix, the PCs + # themselves would correspond to the rows of S^(1/2)*V.T. if M is + # (features, observations) then the PCs would be the columns of + # U*S^(1/2). + # + # * since U and V both contain orthonormal vectors, U*V.T is equivalent + # to a whitened version of M. + + U, s, Vt = np.linalg.svd(M, full_matrices=False) + V = Vt.T + + # PCs are already sorted by descending order + # of the singular values (i.e. by the + # proportion of total variance they explain) + S = np.diag(s) + # PC = (s*V) + # PCs: + #print(U.shape) + #print(S.shape) + #print(V.shape) + #print(s[0], U[0,:]) + + #PC1 = (s[0] * U[:,0]) + #PC2 = (s[1] * U[:,1]) + #for i, p in enumerate(peaklist): + # p.pc1 = PC1[i] + # p.pc2 = PC2[i] + + #mu = peaks.mean(axis=0) + #fig, ax = plt.subplots() + #ax.scatter(xData, yData) + #for axis in U: + # start, end = mu, mu + sigma * axis + # ax.annotate( + # '', xy=end, xycoords='data', + # xytext=start, textcoords='data', + # arrowprops=dict(facecolor='red', width=2.0)) + #ax.set_aspect('equal') + #plt.show() + + + # if plot_steps: + # plt.scatter(PC1, PC2) + # plt.show() + + # PCData1 = (U[:,0]*M) + # PCData2 = (U[:,1]*M) + # plt.scatter(PCData1, PCData2) + # plt.show() + + #plt.scatter(U[:,0],U[:,1]) + #plt.show() + #print('done') + #return PC + + # if we use all of the PCs we can reconstruct the noisy signal perfectly + #Mhat = np.dot(U, np.dot(S, V.T)) + #print('Using all PCs, MSE = %.6G' %(np.mean((M - Mhat)**2))) + + #plt.show() + return S@U.T + +def gettime(x, samplerate, starttime): + startm = int(starttime[-2:]) + starth = int(starttime[:-2]) + seconds = x/samplerate + m, s = divmod(seconds, 60) + m = m + startm + h, m = divmod(m, 60) + h = h+starth + return "%d:%02d:%02d" % (h, m, s) + +def connect_blocks(oldblock): + newblock = Peaklist([]) + newblock.lastofclass = oldblock.lastofclass + newblock.lastofclassx = oldblock.lastofclassx + newblock.classesnearby = oldblock.classesnearby + newblock.classesnearbypccl = oldblock.classesnearbypccl + newblock.classesnearbyx = [clnearbyx - oldblock.len for clnearbyx in oldblock.classesnearbyx] + newblock.classamount = oldblock.classamount + return newblock + ##print('classesnearbyx! old, new ' , oldblock_len,oldblock.classesnearbyx , newblock.classesnearbyx) + +if __name__ == '__main__': + main() + + + +# deleted Code, but unsure if really want to delete: + + #nix #print( b.data_arrays) + + # for cl in np.unique(cllist): + + # currentfish_x = x[:][cllist == cl] + # currentfish_y = y[:][cllist == cl] + # currentfish_h = x[:][cllist == cl] + + + #nix try: + #nix xpositions[cl] = b.create_data_array("f%d_eods" %cl, "spiketimes", data = currentfish_x) + #nix xpositions[cl].append_set_dimension() + #nix # thisfish_eods = b.create_multi_tag("f%d_eods_x"%cl, "eods.position", xpositions[cl]) + #nix # thisfish_eods.references.append(nixdata) + #nix except nix.pycore.exceptions.exceptions.DuplicateName: + #nix + #nix xpositions[cl].append(currentfish_x) + + + #thisfish_eods.create_feature(y, nix.LinkType.Indexed) + #b.create_multi_tag("f%d_eods_y"%cl, "eods.y", positions = y) + #b.create_multi_tag("f%d_eods_h"%cl, "eods.amplitude", positions = h) + #thisfish_eods.create_feature + + + + +# in analyseEods +# in analyseEods classlist = eods[3] #np.vectorize(lambda peak: peak.cl, otypes=[object])(worldpeaks.list) +# in analyseEods fishclass = {} +# in analyseEods #print('classlist: ', classlist) +# in analyseEods # #print('Classes at end: ', np.unique(classlist)) +# in analyseEods +# in analyseEods +# in analyseEods fishes = {} +# in analyseEods for num in np.unique(classlist): +# in analyseEods fishes[num] = eods[:,:][: , classlist == num] +# in analyseEods +# in analyseEods +# in analyseEods +# in analyseEods +# in analyseEods fishes = fill_hidden_3(fishes) # cl-dict : x y z -dict +# in analyseEods #maxlencl = max(fishes, key=lambda k: fishes[k]['x'][-1]-fishes[k]['x'][0]) +# in analyseEods +# in analyseEods fishes, weirdparts = fill_holes(fishes) +# in analyseEods fishes, weirdparts = fill_holes(fishes) +# in analyseEods +# in analyseEods for cl in np.unique(classlist): +# in analyseEods isi = [isi for isi in np.diff(fishes[cl]['x'])] +# in analyseEods fishes[cl][3]= isi +# in analyseEods + + +#npFish +#npFish npFishes = {} +#npFish fishfeaturecount = len(fishes[cl]) +#npFish for cl in np.unique(classlist): +#npFish npFishes[cl]= np.zeros([fishfeaturecount, len(fishes[cl]['x'])]) +#npFish for i, feature in enumerate(['x', 'y', 'h', 'isi']): #enumerate(fishes[cl]): +#npFish if feature == 'isi': +#npFish fishes[cl][feature].append(fishes[cl][feature][-1]) +#npFish # #print(feature, cl) +#npFish npFishes[cl][i] = np.array(fishes[cl][feature]) +#npFish # #print(npFishes[classlist[0]][0]) +#npFish # #print(npFishes[classlist[0]][2]) +#npFish # #print(npFishes[classlist[0]][3]) +#npFish #np.savetxt('worldpeaks_x_y_cl_2', (x,y,cl, isi), fmt="%s") +#npFish +#npFish np.set_printoptions(threshold=np.nan) +#npFish +#npFish for i, cl in enumerate(np.unique(classlist)): #Neue Klassennamen! +#npFish x = npFishes[cl][0] +#npFish y = npFishes[cl][1] +#npFish h = npFishes[cl][2] +#npFish isi = npFishes[cl][3] +#npFish +#npFish np.savetxt(filename[:-4]+'Fish_xyhisi_cl%d' % i, npFishes[cl], fmt="%s") +#npFish +#npFish +#npFish + + + + + + # / TODO: Peakclassifikator bei weit wegliegenden klassen? Done + # / TODO: Class2 implementation auf class linreg übertragen Done - Doof + # TODO: Klassen zusammenfuegen/ Noise zusammenfuegen + # - Wenn last 3 und first 3 zueinander passen in 1. Amplitude und 2. Periode (falls peaks) oder 2. randomzeugs? - Noiseerkennung und 2. Amplitude + # TODO: Klassen filtern auf Patternausreißer + # diff --git a/thunderfish/DextersThunderfishAddition/analyseDexRefactorShort.py b/thunderfish/DextersThunderfishAddition/analyseDexRefactorShort.py new file mode 100644 index 00000000..11acef89 --- /dev/null +++ b/thunderfish/DextersThunderfishAddition/analyseDexRefactorShort.py @@ -0,0 +1,1995 @@ +# Script to detect and classify EODs in recordings of weakly electric pulse +# fish, Dexter Früh, 2018 +# # it is suggested to save the recording in +# workingdirectory/recording/recording.WAV + +# results will be saved in workingdirectory/recording/ +# +# input: +# - [Recorded Timeseries] recording.WAV +# outputs(optional): +# - [Detected and Classified EODs] +# (Numpy Array with Shape (Number of EODs, 4 (Attributes of EODs)), +# with the EOD-Attributes +# - x-location of the EOD +# (time/x-coordinate/datapoint in recording) +# - y-location of the EOD +# (Amplitude of the positive peak of the pulse-EOD) +# - height of the EOD(largest distance between peak and through in the EOD) +# - class of the EOD +# eods_recording.npy +# - [plots of the results of each analyse step for each +# analysepart (timeinterval of length = deltat) of the recording] +# +# required command line arguments at function call +# - save : if True, save the results to a numpy file (possibly +# overwrite existing) +# - plot : if True, plot results in each analysestep +# - new : if True, do a new analysis of the recording, even if there +# is an existing analyzed .npy file with the right name. +# +# call with: +# python3 scriptname.py save plot new (starttime endtime[sec] for only +# partial analysis) +# +# other parameters are behind imports and some hardcoded at the relevant +# codestep +import sys +import numpy as np +import copy +from scipy.stats import gmean +from scipy import stats +from scipy import signal +from scipy import optimize +import matplotlib +from fish import ProgressFish +import matplotlib.pyplot as plt +from thunderfish.dataloader import open_data +from thunderfish.peakdetection import detect_peaks +from scipy.interpolate import interp1d +from scipy.signal import savgol_filter +from collections import deque +import ntpath +import nixio as nix +import time +import os +from shutil import copy2 + +from ownDataStructures import Peak, Tr, Peaklist +import DextersThunderfishAddition as dta + +from IPython import embed +# parameters for the analysis + +deltat = 30.0 # seconds of buffer size +thresh = 0.04 # minimal threshold for peakdetection +peakwidth = 20 # width of a peak and minimal distance between two EODs +# basic parameters for thunderfish.dataloader.open_data +verbose = 0 +channel = 0 +# timeinterval to analyze other than the whole recording +#starttime = 0 +#endtime = 0 +#timegiven = False + +def main(): # analyse_dex.py filename save plot new (optional starttime endtime [sec]) + home = os.path.expanduser('~') + os.chdir(home) + # defaults for optional arguments + timegiven = False + plot_steps = False + # parse command line arguments - filepath, save, plot, new (, starttime, + filepath = sys.argv[1] + save = int(sys.argv[2]) + plot_steps = int(sys.argv[3]) + new = int(sys.argv[4]) + if len(sys.argv[:])>5: + timegiven = True + starttime = int(sys.argv[5]) + endtime = int(sys.argv[6]) + #print(starttime, endtime) + peaks = np.array([]) + troughs = np.array([]) + cutsize = 20 + maxwidth = 50 #10 + ultimate_threshold = thresh+0.01 + filename = path_leaf(filepath) + proceed = input('Currently operates in home directory. If given a pulsefish recording filename.WAV, then a folder filename/ will be created in the home directory and all relevant files will be stored there. continue? [y/n] ').lower() + if proceed == 'n': + quit() + elif proceed == 'y': + pass + #do something + elif proceed != 'y': + quit() + datasavepath = filename[:-4] + print(datasavepath) + eods_len = 0 + ### ## starting analysis + if new == 1 or not os.path.exists(filename[:-4]+"/eods5_"+filename[:-3]+"npy"): + ### ## import data + with open_data(filepath, channel, deltat, 0.0, verbose) as data: + if save == 1 or save == 0: + if not os.path.exists(datasavepath): + os.makedirs(datasavepath) + copy2(filepath, datasavepath) + samplerate = data.samplerate + ### ## split datalength into smaller blocks + nblock = int(deltat*data.samplerate) + if timegiven == True: + parttime1 = starttime*samplerate + parttime2 = endtime*samplerate + data = data[parttime1:parttime2] + if len(data)%nblock != 0: + blockamount = len(data)//nblock + 1 + else: + blockamount = len(data)//nblock + bigblock = [] + ### ## output first (0%) progress bar + print('blockamount: ' , blockamount) + progress = 0 + print(progress, '%' , end = " ", flush = True) + fish = ProgressFish(total = blockamount) + olddatalen = 0 + startblock = 0 + ## iterating through the blocks, detecting peaks in each block + for idx in range(startblock, blockamount): + ### ## print progress + if progress < (idx*100 //blockamount): + progress = (idx*100)//blockamount + progressstr = 'Partstatus: '+ str(0) + ' '*2 + ' % (' + '0' + ' '*4+ '/' + '?'+' '*4+ '), Filestatus:' + fish.animate(amount = idx, dexextra = progressstr) + progressstr = 'Partstatus: '+ 'Part ' + '0'+ '/''5'+' Filestatus:' + fish.animate(amount = idx, dexextra = progressstr) + datx = data[idx*nblock:(idx+1)*nblock] + # ---------- analysis -------------------------------------------------------------------------- + # step1: detect peaks in timeseries + pk, tr = detect_peaks(datx, thresh) + troughs = tr + # continue with analysis only if multiple peaks are detected + if len(pk) > 2: + peaks = dta.makeeventlist(pk,tr,datx,peakwidth) + #dta.plot_events_on_data(peaks, datx) + peakindices, peakx, peakh = dta.discardnearbyevents(peaks[0],peaks[1],peakwidth) + peaks = peaks[:,peakindices] + progressstr = 'Partstatus: '+ 'Part ' + '1'+ '/''5'+' Filestatus:' + fish.animate(amount = idx, dexextra = progressstr) + if len(peaks) > 0: + ### ## connects the current part with the one that came before, to allow for a continuous analysis + if idx > startblock: + print('peaklist.len: ',peaklist.len) + peaklist = dta.connect_blocks(peaklist) + print(peaklist.len, peaklist.classesnearbyx) + else: + peaklist = Peaklist([]) + aligned_snips = dta.cut_snippets(datx,peaks[0], 15, int_met = "cubic", int_fact = 10,max_offset = 1.5) + progressstr = 'Partstatus: '+ 'Part ' + '2'+ '/''5'+' Filestatus:' + fish.animate(amount = idx, dexextra = progressstr) + # calculates principal components + pcs = dta.pc(aligned_snips)#pc_refactor(aligned_snips) + #print('dbscan') + order = 5 + minpeaks = 3 if deltat < 2 else 10 + labels = dta.cluster_events(pcs, peaks, order, 0.4, minpeaks, False, olddatalen, method = 'DBSCAN') + #print('peaks before align', peaks) + peaks = np.append(peaks,[labels], axis = 0) + #dta.plot_events_on_data(peaks, datx) + olddatalen = len(datx) + num = 1 + progressstr = 'Partstatus: '+ 'Part ' + '3'+ '/''5'+' Filestatus:' + fish.animate(amount = idx, dexextra = progressstr) + # classifies the peaks using the data from the clustered classes and a simple amplitude-walk which classifies peaks as different classes if their amplitude is too far from any other classes' last three peaks + #peaks[3]=[-1]*len(peaks[3]) + if idx > startblock: + dta.alignclusterlabels(labels, peaklist, peaks,data=datx) + print(peaklist.classesnearby) + peaks, peaklist = dta.ampwalkclassify3_refactor(peaks, peaklist) # classification by amplitude + print(peaklist.classesnearby) + #join_count=0 + # while True and joincc(peaklist, peaks) == True and join_count < 200: + # join_count += 1 + # continue + # discards all classes that contain less than mincl EODs + minlen = 6 # >=1 + peaks = dta.discard_short_classes(peaks, minlen) + if len(peaks[0]) > 0: + peaks = dta.discard_wave_pulses(peaks, datx) + # plots the data part and its detected and classified peaks + if plot_steps == True: + dta.plot_events_on_data(peaks, datx) + pass + # map the analyzed EODs of the buffer part to the whole + # recording + worldpeaks = np.copy(peaks) + # change peaks location in the buffered part to the location relative to the + peaklist.len = nblock + # peaklocations relative to whole recording + worldpeaks[0] = worldpeaks[0] + (idx*nblock) + thisblock_eods = np.delete(peaks,3,0) + thisblockeods_len = len(thisblock_eods[0]) + progressstr = 'Partstatus: '+ 'Part ' + '4'+ '/''5'+' Filestatus:' + fish.animate(amount = idx, dexextra = progressstr) + # save the peaks of the current buffered part to a numpy-memmap on the disk + if thisblockeods_len> 0 and save == 1 or save == 0: + if idx == 0: + eods = np.memmap(datasavepath+"/eods_"+filename[:-3]+"npmmp", dtype='float64', mode='w+', shape=(4,thisblockeods_len), order = 'F') + dtypesize = 8#4 #float32 is 32bit = >4< bytes long ---changed to float64 -> 8bit + eods = np.memmap(datasavepath+"/eods_"+filename[:-3]+"npmmp", dtype='float64', mode='r+', offset = dtypesize*eods_len*4, shape=(4,thisblockeods_len), order = 'F') + eods[:] = thisblock_eods + eods_len += thisblockeods_len + # to clean the plt buffer... + plt.close() + # get and print the measured times of the algorithm parts for the + # current buffer + progressstr = 'Partstatus: '+ 'Part ' + '5'+ '/''5'+' Filestatus:' + fish.animate(amount = idx, dexextra = progressstr) + # plt.show() + # after the last buffered part has finished, save the memory mapped + # numpy file of the detected and classified EODs to a .npy file to the + # disk + eods = np.memmap(datasavepath+"/eods_"+filename[:-3]+"npmmp", dtype='float64', mode='r+', shape=(4,eods_len), order = 'F') + print('before final saving: print unique eodcl: ' , np.unique(eods[3])) + if save == 1: + # #print('eods', eods[3]) + path = filename[:-4]+"/" + if not os.path.exists(path): + os.makedirs(path) + if eods_len > 0: + print('Saved!') + np.save(filename[:-4]+"/eods8_"+filename[:-3]+"npy", eods) + else: + #np.save(filename[:-4]+"/eods5_"+filename[:-3]+"npy", thisblock_eods) + print('not saved') + + else: # if there already has been a certain existing result file and 'new' was set to False + print('already analyzed') + + + # not used data implementation using NIX + # Save Data + + # Needed: + # Meta: Starttime, Startdate, Length + # x, y, h, cl, difftonextinclass -> freq ? , + + # Later: Find "Nofish" + # Find "Twofish" + # Find "BadData" + # Find "Freqpeak" + # ? Find "Amppeak" + # + + # bigblock = np.array(bigblock) + # x=xarray(bigblock) + # y=yarray(bigblock) + # cl=clarray(bigblock) + + + #nix file = nix.File.open(file_name, nix.FileMode.ReadWrite) + #nix b = file.blocks[0] + #nix nixdata = b.data_arrays[0] + #nix cldata = [] + #nix #print(classes) + #nix #print(b.data_arrays) + #nix for i in range(len(np.unique(classes))): + #nix cldata.append(b.data_arrays[i+1]) + + + # for cl in + + # for cl in + # x = thisfish_eods + + + #nix file.close() + +def path_leaf(path): + ntpath.basename("a/b/c") + head, tail = ntpath.split(path) + return tail or ntpath.basename(head) + +def fill_hidden(fishclasses): + + fishes = fishclasses + + nohidefishes = {} + for cl in fishes: + x =[] + y = [] + h = [] + fish = fishes[cl] + # #print('fish', fish) + fishisi = calcisi(fish) + isi = fishisi[0] + for i, newisi in enumerate(fishisi): + leftpeak = fish[i] + x.append(leftpeak.x) + y.append(leftpeak.y) + h.append(leftpeak.height) + if newisi > 2.8*isi: + guessx = leftpeak.x + isi + + while guessx < leftpeak.x + newisi-0.8*isi: + + peakx = peakaround(guessx, isi*0.1, fishes) + if peakx is not None: + x.append(peakx) + y.append(leftpeak.y) + h.append(leftpeak.height) + guessx = peakx+ isi + (peakx-guessx) + + continue + break + isi = newisi + nohidefishes[cl]= {'x':x,'y':y,'h':h} + return nohidefishes + +def plotheights(peaklist): + heights = heightarray(peaklist) + x_locations = xarray(peaklist) + plt.scatter(x_locations, heights) + plt.show() + +def ploteods(eods, data): + plt.plot(range(len(data)),data, color = 'black') + classlist = eods[3] + cmap = plt.get_cmap('jet') + colors =cmap(np.linspace(0, 1.0, 3000)) #len(np.unique(classlist)))) + np.random.seed(22) + np.random.shuffle(colors) + colors = [colors[cl] for cl in np.unique(classlist)] + # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.c', ms=20) + x=0 + if len(classlist)>0: + # #print(classlist) + # #print('classes: ' , np.unique(classlist)) + from collections import Counter + count = Counter(classlist) + # #print('longest class: ', count.most_common()[0]) + for num, color in zip(np.unique(classlist), colors): + peaksofclass = eods[:,:][:, classlist == num] + #xpred = linreg_pattern(peaksofclass[0:3]) + #for p in peaksofclass[0:3]: + # #print(p.x) + ##print(xpred, peaksofclass[3].x) + + #if len(peaksofclass) > 1000: + # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.', color = 'red', ms =20) + #else: + plt.plot(peaksofclass[0], peaksofclass[1], '.', color = color, ms =20) + plt.show() + +def fill_hidden_3(fishes): + + fishes = fishes + + nohidefishes = {} + for cl, fish in fishes.items(): + x =[] + y = [] + h = [] + # fish = fishes[cl] passt net, fishes is np.array mit (cl, (xyh)) + fishisi = np.diff(fish[0]) + isi = fishisi[0] + for i, newisi in enumerate(fishisi): + leftpeak = i + x.append(fish[0][i]) + y.append(fish[1][i]) + h.append(fish[2][i]) + # #print(cl, fish[0][i], isi, newisi) + if newisi > 2.8*isi: + guessx = fish[0][i] + isi + + while guessx < fish[0][i] + newisi-0.8*isi: + + peakx = peakaround3(guessx, isi*0.1, fishes) + if peakx is not None: + # #print(jup) + x.append(peakx) + y.append(fish[1][i]) + h.append(fish[2][i]) + guessx = peakx+ isi + (peakx-guessx) + + continue + break + isi = newisi + nohidefishes[cl]= {'x':x,'y':y,'h':h} + + return nohidefishes + +def peakaround2(guessx, interval, fishes): + found = False + for cl, fish in fishes.items(): + for px in fish['x']: + distold = interval + if px < guessx-interval: + continue + # #print('in area', guessx-interval) + if guessx-interval < px < guessx+interval: + found = True + dist = px-guessx + if abs(dist) < abs(distold): + distold = dist + if px > guessx+interval: + if found == True: + # #print(guessx, dist) + return guessx + dist + else: break + return None + +def peakaround3(guessx, interval, fishes): + found = False + for cl, fish in fishes.items(): + for px in fish[0]: + distold = interval + if px < guessx-interval: + continue + # #print('in area', guessx-interval) + if guessx-interval < px < guessx+interval: + found = True + dist = px-guessx + if abs(dist) < abs(distold): + distold = dist + if px > guessx+interval: + if found == True: + # #print(guessx, dist) + return guessx + dist + else: break + return None + +def peakaround(guessx, interval, fishes): + found = False + for cl, fish in fishes.items(): + for peak in fish: + + distold = interval + if peak.x < guessx-interval: + continue + # #print('in area') + if guessx-interval < peak.x < guessx+interval: + found = True + dist = peak.x-guessx + if abs(dist) < abs(distold): + distold = dist + if peak.x > guessx+interval: + if found == True: + # #print(guessx, dist) + return guessx + dist + else: break + return None + +def fill_holes(fishes): #returns peakx, peaky, peakheight # Fills holes that seem to be missed peaks in peakarray with fake (X/Y/height)-Peaks + retur = {} + lost = {} + for cl, fish in fishes.items(): + fishisi = np.diff(fish['x']) + mark = np.zeros_like(fishisi) + isi = 0 + ##print('mark', mark) + # #print('fishisi' , fishisi) + #find zigzag: + c=0 + c0= 0 + n=0 + for i, newisi in enumerate(fishisi): + if abs(newisi - isi)>0.15*isi: + if (newisi > isi) != (fishisi[i-1] > isi): + c+=1 + # #print(abs(newisi - isi), 'x = ', fish[i].x) + c0+=1 + elif c > 0: + n += 1 + if n == 6: + if c > 6: + # print ('zigzag x = ', fish['x'][i-6-c0], fish['x'][i-6]) + mark[i-6-c0:i-6]= -5 + c = 0 + c0=0 + n = 0 + + #if c > 0: + # #print(i, c) + # if c == 6: + # #print('zigzag!') + isi = newisi + isi = 0 + for i, newisi in enumerate(fishisi): + ##print('mark: ' , mark) + if mark[i] == -5: continue + if i+2 >= len(fishisi): + continue + if (2.2*isi > newisi > 1.8*isi) and (1.5*isi>fishisi[i+1] > 0.5*isi) : + mark[i] = 1 + isi = newisi + # #print('found 1!' , i) + elif (2.2*isi > newisi > 1.8*isi) and (2.2*isi> fishisi[i+1] > 1.8*isi) and (1.5*isi > fishisi[i+2] > 0.5*isi): + mark[i] = 1 + isi = isi + elif 3.4*isi > newisi > 2.6*isi and 1.5*isi > fishisi[i+1] > 0.5*isi: + mark[i] = 2 + + elif (0.6* isi > newisi > 0): + # #print('-1 found', i ) + if mark[i] ==0 and mark[i+1] ==0 and mark[i-1]==0 : + # isi = newisi + # continue + # #print('was not already set') + if fishisi[i-2] > isi < fishisi[i+1]: + mark[i] = -1 + # #print('-1') + elif isi > fishisi[i+1] < fishisi[i+2]: + mark[i+1] = -1 + # #print('-1') + isi = newisi + filldpeaks = [] + x = [] + y = [] + h = [] + x_lost=[] + y_lost=[] + h_lost=[] + # #print('filledmarks: ', mark) + for i, m in enumerate(mark): + if m == -1 : + # #print('-1 at x = ', fish['x'][i]) + continue + if m == -5: + x_lost.append(fish['x'][i]) + y_lost.append(fish['y'][i]) + h_lost.append(fish['h'][i]) + x.append(fish['x'][i]) + y.append(fish['y'][i]) + h.append(fish['h'][i]) + continue + x.append(fish['x'][i]) + y.append(fish['y'][i]) + h.append(fish['h'][i]) + if m == 1: + # #print('hofly added peak at x = ' , fish['x'][i]) + x.append(fish['x'][i] + fishisi[i-1]) + y.append( 0.5*(fish['y'][i]+fish['y'][i+1])) + h.append(0.5*(fish['h'][i]+fish['h'][i+1])) + elif m== 2: + x.append(fish['x'][i] + fishisi[i]) + y.append( 0.5*(fish['y'][i]+fish['y'][i+1])) + h.append(0.5*(fish['h'][i]+fish['h'][i+2])) + x.append(fish['x'][i] + 2*fishisi[i-1]) + y.append( 0.5*(fish['y'][i]+fish['y'][i+2])) + h.append(0.5*(fish['h'][i]+fish['h'][i+2])) + # #print('added at x = ', fish['x'][i] + fishisi[i]) + retur[cl] = {'x':x,'y':y,'h':h} + lost[cl] = {'xlost':x_lost,'ylost':y_lost,'hlost':h_lost} + # filledpeaks =np.array(filledpeaks) + # #print(filledpeaks.shape) + # filledpeaks. + return retur, lost + +def calc_tsh_noise(peaks, data): + heights = np.vectorize(lambda peak: peak.height)(peaks) + # peakx = xarray(peaks) + # peakxlist = peakx.tolist() + # #print('datenstdanfang: ', np.std(data)) + # datatsh = np.mean(np.abs(data))# + # datatsh = 2* np.std(data) + # peakareas = [i for x in peakx for i in range(x-10, x+10) if (i < len(data))] + # peakareas = np.arange(peakx-10, peakx+10, 1) + # relevantdata = [] + #peakareas = np.unique(peakareas) + # #print(len(peakareas), len(data), ' len peakarea and data' , datatsh) + #relevantdata is the data without the areas around the peaks, to calculate the standard deviation of the noise + #c = 0 + tsh = 0.1*np.std(heights) + + #for i, dat in enumerate(data): + # if peakareas[c] == i and c dist: + # dist = tdist + #print('dist', dist) + if dist>=0: + valid = True + if olddatalen > 0: + alignlabels(labels, peaks, olddatalen) + for i, p in enumerate(peaklist): + pcclasses[peaknum] = labels[i] + return valid + if takekm: + km = KMeans(n_clusters=3, n_init = 3, init = 'random', tol=1e-5, random_state=170, verbose = True).fit(X) + core_samples_mask = np.zeros_like(km.labels_, dtype=bool) + labels = km.labels_ + if takekm: + for i, p in enumerate(peaklist): + # print('label ', labels[i]) + pcclasses[peaknum] = p.pccl + # Number of clusters in labels, ignoring noise if present. + n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) + #print('Estimated number of clusters: %d' % n_clusters_) + # ############################################################################# + # Plot result + # Black removed and is used for noise instead. + unique_labels = set(labels) + colors = [plt.cm.Spectral(each) + for each in np.linspace(0, 1, len(unique_labels))] + fig = plt.figure() + ax = fig.add_subplot(111, projection = '3d') + for k, col in zip(unique_labels, colors): + if k == -1: + # Black used for noise. + col = [0, 0, 0, 1] + class_member_mask = (labels == k) + xy = X[class_member_mask] + # print(col) + ax.plot(xy[:, 0], xy[:, 1],xy[:,2], 'o', markerfacecolor=tuple(col), + markeredgecolor='k', markersize=14) + ax.set_title('Estimated number of clusters: %d' % n_clusters_) + #plt.show() + + + from sklearn.neighbors import kneighbors_graph + knn_graph = kneighbors_graph(X, 15, include_self=False) + ac = AgglomerativeClustering(linkage = 'complete', n_clusters = 3, connectivity = knn_graph).fit(X) + core_samples_mask = np.zeros_like(ac.labels_, dtype=bool) + labels = ac.labels_ + if takekm: + for i, p in enumerate(peaklist): + print('label ', labels[i]) + pcclasses[peaknum] = labels[i] + # Number of clusters in labels, ignoring noise if present. + n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) + #print('Estimated number of clusters: %d' % n_clusters_) + # ############################################################################# + # Plot result + # Black removed and is used for noise instead. + unique_labels = set(labels) + colors = [plt.cm.Spectral(each) + for each in np.linspace(0, 1, len(unique_labels))] + fig = plt.figure() + ax = fig.add_subplot(111, projection = '3d') + for k, col in zip(unique_labels, colors): + if k == -1: + # Black used for noise. + col = [0, 0, 0, 1] + class_member_mask = (labels == k) + xy = X[class_member_mask] + print(col) + ax.plot(xy[:, 0], xy[:, 1],xy[:,2], 'o', markerfacecolor=tuple(col), + markeredgecolor='k', markersize=14) + ax.set_title('Estimated number of clusters: %d' % n_clusters_) + #plt.show() + +def ampwalkclassify3_refactor(peaks,peaklist): # final classificator + classamount = peaklist.classamount + lastofclass = peaklist.lastofclass # dict of a lists of the last few heightvalues of a class, f.E ((1,[0.7,0.68,0.71]), (5, [0.2, 0.21, 0.21])) + lastofclassx = peaklist.lastofclassx # dict of a list of the last few x-values of a class + a=0 + elem = 0 + thresholder = [] + comperr = 1 + classesnearby = peaklist.classesnearby # list of the classes of the last n peaks f.E:[1,2,1,2,1,3,2,1,...] + classesnearbyx = peaklist.classesnearbyx # list of the x-values of the last n peaks, f.E:[13300, 13460, 13587, 13690, 13701, ...] + classesnearbypccl = peaklist.classesnearbypccl # list of the pc-classified classes of the last n peaks + classes = np.zeros((len(peaks[0]))) + if len(peaks) >3: + pcclasses = peaks[3] + print('ERROOR') + positions = peaks[0] + heights = peaks[1] + + # #print('nearbyclasses at start:' ,classesnearby, classesnearbyx) + # for peak in peaks: + # peak.cl = peak.pccl+2 + # peaklist.classlist = np.vectorize(lambda peak: peak.cl, otypes=[object])(peaklist.list) + # return peaks + cl = 0 + maxdistance = 30000 # Max distance to possibly belong to the same class + factor = 1.6 # factor by which a peak fits into a class, f.E: classheight = 1 , factor = 2 => peaks accepted in range (0.5,2) + c=0 + peakamount = len(peaks.T) + #fish = ProgressFish(total = peakamount) + for peaknum, p in enumerate(peaks.T): + perc = str((peaknum*100)//peakamount) + # fish.animate(amount = "", dexextra = 'Partstatus: '+ ' '*(3-len(perc)) +perc + ' % (' + ' '*(4-len(str(peaknum)))+str(peaknum) + '/' + ' ' *(4-len(str(peakamount)))+str(peakamount) + '), Filestatus:') + awc_btime = [] + if len(lastofclass) == 0: # Dict with all classes, containing the heights of the last few peaks + lastofclass[1] = deque() + lastofclassx[1]= deque() + lastofclass[1].append(heights[peaknum]) + lastofclassx[1].append(positions[peaknum]) + classesnearby.append(1) + classesnearbyx.append(-1) + classesnearbypccl.append(pcclasses[peaknum]) + classes[peaknum] = 1 + classamount += 1 + continue + time1 = time.time() + for i, cl in enumerate(classesnearby): + if (positions[peaknum]-classesnearbyx[i]) > maxdistance: + classesnearby.pop(i) + classesnearbyx.pop(i) + classesnearbypccl.pop(i) + lastofclassisis = [] + for i in classesnearby: + # print(i, classesnearby) + lastofclassisis.append(np.median(np.diff(lastofclassx[i]))) + meanisi = np.mean(lastofclassisis) + if 32000 > 20*meanisi> 6000: + maxdistance = 20*meanisi + #print(meanisi, maxdistance , 'maxdistance ----------------------------------------------------------------------------------------------') + time2 = time.time() + awc_btime.append(time2-time1) #0 + cl = 0 # 'No class' + comperr = 1 + ##print('classesnearby at a peak', classesnearby) + clnrby = np.unique(classesnearby) + time1 = time.time() +# classmean = 0 + # if pcclasses[peaknum] == -1: + # factor = 1.2 + # else: + # factor = 1.6 + for i in clnrby: + #print('cl: ', i) + # if classesnearbypccl[classesnearby.index(i)] == -1: + # factor = 2.2 + # else: factor = 1.6 + classmean = np.mean(lastofclass[i]) + logerror = np.abs(np.log2(heights[peaknum])-np.log2(classmean)) + abserror = np.abs(heights[peaknum]-classmean) + logthresh = np.log2(factor) + #ä#print(np.std(lastofclass[i])) absthresh = 0.5*classmean # #print('test log', np.abs(np.log2(np.array([0.4,0.5,1,1.5,2,2.4]))-np.log2(np.array([1,1,1,1,1,1]))) ) # abs(classmean*0.5) + #relerror = error + relerror = logerror + #relabserror = abserror/thresh + # if 1140 < p.num < 1150: + # print(p.num) + # print('for classes at one peak: classmean, height, abserror, thresh', + # classmean,heights[peaknum], logerror, logthresh) + #print(len(classesnearbypccl), len(classesnearby)) + #print(classmean, heights[peaknum], logerror, logthresh, pcclasses[peaknum], classesnearbypccl[classesnearby.index(i)]) + if classesnearbypccl[classesnearby.index(i)] == pcclasses[peaknum] or pcclasses[peaknum] == -1:# or + if logerror < logthresh: ## SameClass-Condition + if relerror < comperr and (positions[peaknum]-classesnearbyx[classesnearby.index(i)]) 2*compareisierror: +# cl = holdlastcl + + time2 = time.time() + awc_btime.append(time2-time1) #1 + time1 = time.time() + if pcclasses[peaknum] != -1: + if cl != 0 : + #print(cl) + if len(lastofclass[cl]) >= 3: + lastofclass[cl].popleft() + if len(lastofclassx[cl]) >= 3: + lastofclassx[cl].popleft() + lastofclass[cl].append(heights[peaknum]) + lastofclassx[cl].append(positions[peaknum]) + classes[peaknum] = cl + else: # Add new class + cl = classamount+1 + #print('existingclasses: ', classamount) + classamount = cl + #print('newclass: ----------------------------------------------------------------', cl) + lastofclass[cl] = deque() + lastofclassx[cl] = deque() + lastofclass[cl].append(heights[peaknum]) + lastofclassx[cl].append(positions[peaknum]) + classes[peaknum] = cl + classesnearby.append(cl) + classesnearbyx.append(positions[peaknum]) + classesnearbypccl.append(pcclasses[peaknum]) + ##print('tatsaechlich: ', cl) + if len(classesnearby) >= 12: #kacke implementiert? + minind = classesnearbyx.index(min(classesnearbyx)) + del lastofclass[classesnearby[minind]] + del lastofclassx[classesnearby[minind]] + #print(classesnearby[minind], 'del') + classesnearby.pop(minind) + classesnearbyx.pop(minind) + classesnearbypccl.pop(minind) + # for ind, clnrby in enumerate(reversed(classesnearby)): + # classesnearbyx + # del lastofclass[classesnearby[ind]] + # # del lastofclassx[classesnearby[minind]] + # classesnearby.pop(minind) + # classesnearbyx.pop(minind) + try: + ind=classesnearby.index(cl) + classesnearbyx[ind] = positions[peaknum] + # #print(ind ,' --------------------------------------here -----------------------------') + except ValueError: + classesnearby.append(cl) + classesnearbyx.append(positions[peaknum]) + classesnearbypccl.append(pcclasses[peaknum]) + else: + if cl != 0: + classes[peaknum] = cl + else: + cl = classamount+1 + #print('existingclasses: ', classamount) + classamount = cl + #print('newclass: ', cl) + lastofclass[cl] = deque() + lastofclassx[cl] = deque() + lastofclass[cl].append(heights[peaknum]) + lastofclassx[cl].append(positions[peaknum]) + classes[peaknum] = cl + classesnearby.append(cl) + classesnearbyx.append(positions[peaknum]) + classesnearbypccl.append(pcclasses[peaknum]) + if len(classesnearby) >= 12: #kacke implementiert? + minind = classesnearbyx.index(min(classesnearbyx)) + del lastofclass[classesnearby[minind]] + del lastofclassx[classesnearby[minind]] + #print(classesnearby[minind], 'del') + classesnearby.pop(minind) + classesnearbyx.pop(minind) + classesnearbypccl.pop(minind) + # for ind, clnrby in enumerate(reversed(classesnearby)): + # classesnearbyx + # del lastofclass[classesnearby[ind]] + # # del lastofclassx[classesnearby[minind]] + # classesnearby.pop(minind) + # classesnearbyx.pop(minind) + try: + ind=classesnearby.index(cl) + classesnearbyx[ind] = positions[peaknum] + # #print(ind ,' --------------------------------------here -----------------------------') + except ValueError: + classesnearby.append(cl) + classesnearbyx.append(positions[peaknum]) + classesnearbypccl.append(pcclasses[peaknum]) + time2 = time.time() + # awc_btime.append(time2-time1) #2 + # classesnearby = [cls for cls in classesnearby if cls != False] + # classesnearbyx = [clx for clx in classesnearbyx if clx != False] + # + # + #print('awc_btime ', awc_btime , ' newpeak-------------------------------------------------------- :') + peaklist.lastofclass = lastofclass + peaklist.lastofclassx = lastofclassx + peaklist.classesnearby = classesnearby + peaklist.classesnearbyx = classesnearbyx + peaklist.classlist = classes # np.vectorize(lambda peak: peak.cl, otypes=[object])(peaklist.list) + peaklist.classamount = classamount + peaks = np.append(peaks,classes[None,:], axis = 0) + return peaks, peaklist + +def joincc(peaklist,peaks): + # connects classes that appear after each other... + # peaklist = peaks.list + joinedsome = False + classlist = peaks[4] + peaksofclass = {} + last = [] + connect = {} #connect classes in connect+ + classcount = dict.fromkeys(classlist, 0) + ##print(classcount) + #classcount = [0]*len(np.unique(classlist)) + # #print(np.unique(classlist)) + for cl in np.unique(classlist): + peaksofclass[cl]= peaks[:,classlist == cl] + for i in range(len(peaks[0])): # i is the increasing index of the peaks + p = peaks[:,i] + poc = peaksofclass[p[4]] + classcount[p[4]]+=1 + countclass = p[4] #the current class before it might be changed to the connected class + if p[4] in connect: + p[4] = connect[p[4]] #peakclass is changed to connected class + # #print('changed ', countclass, 'to', p.cl) + joinedsome = True + + if len(poc) == classcount[countclass]: #the current peak is last peak of its class + last = poc[-len(poc) if len(poc) <= 5 else 5:] #the last peaks of the class + # #print('last: ', last) + #mean_last = np.mean(np.vectorize(lambda peak: peak[2])(last)) + mean_last = np.mean(last[2,:]) + nextfirst = {} # the first peaks of the next coming class(es) + # #print('class: ', countclass, 'at x = ', p.x, 'mean_last: ', mean_last) + for nexti in range(20): # the next 10 peaks are considered if they belong to the same classe + if i + nexti >= len(peaks[0]): break + inextp = peaks[:,i+nexti] + if classcount[inextp[4]] == 0: #current peak is first peak of its class + # #print('found a new begin! its class:' , inextp.cl) + ponc = peaksofclass[inextp[4]] # + nextfirst[inextp[4]] = ponc[0:len(ponc) if len(ponc) <= 5 else 5] + # #print(np.mean(np.vectorize(lambda peak: peak.height)(nextfirst[inextp.cl]))) + # #print(nextfirst) + compare = 1 + c = 0 + nextclass = -1 + for nextcl, first in nextfirst.items(): + mean_nextfirst = np.mean(first[2,:])#np.mean(np.vectorize(lambda peak: peak.height)(first)) + # #print(mean_nextfirst) + error = abs(mean_nextfirst - mean_last)/(mean_nextfirst) + if error < 1: + if compare < error: + continue + compare = error + if nextcl in connect: #if the peak that ist considered belongs to a class, that is already supposed to be connected to the current class + pocc = peaksofclass[connect[nextcl]] #peaks of the currently supposed connected class + if ( abs(mean_nextfirst - np.mean(pocc[-len(pocc) if -len(pocc) <= 5 else 5:][2])) + < abs(mean_nextfirst - mean_last) ): + continue + nextclass = nextcl + if nextclass != -1: + connect[nextclass] = p[4] + # #print('connect ', p.cl , ' and ', nextcl) + for cl in peaklist.classesnearby: + if cl in connect: + # #print('cl, connect', cl, connect[cl]) + peaklist.classesnearby[peaklist.classesnearby.index(cl)] = connect[cl] + peaklist.lastofclass[connect[cl]]=peaklist.lastofclass[cl] + peaklist.lastofclassx[connect[cl]]= peaklist.lastofclassx[cl] + peaklist.classlist = peaks[4] + return joinedsome + # for poc in peaksofclass: + # if len(poc) >= 3: + # newlast = poc[-3:] + # first = poc[:3] + # else: + # newlast = poc[-len(poc):] + # first = poc[:len(poc)] + # if last != []: + # if abs(np.mean(first) - np.mean(last)) < 0: + # #print('oh') + +def discardwaves_refactor(peaks, data): + deleteclasses = [] + for cl in np.unique(peaks[3]): + peaksofclass = peaks[:,peaks[3] == cl] + isi = np.diff(peaksofclass[0]) + isi_mean = np.mean(isi) + # #print('isismean',isi_mean) + widepeaks = 0 + # #print('width',peaksofclass[2].width) + isi_tenth_area = lambda x, isi:np.arange(np.floor(x-0.1*isi),np.ceil(x+0.1*isi),1, dtype = np.int) + for p in peaksofclass.T: + data = np.array(data) + try: + for dp_around in data[isi_tenth_area(p[0],isi_mean)]:#np.floor(p[0]-0.1*isi_mean), np.ceil(p[0]+0.1*isi_mean),1)]:# + if dp_around <= p[1]-p[2]: + break + except IndexError: + pass + else: + widepeaks+=1 + ## p.isreal_pleateaupeaks() + if widepeaks > len(peaksofclass)*0.5: + deleteclasses.append(cl) + for cl in deleteclasses: + peaks = peaks[:,peaks[3]!=cl] + return peaks + +def smallclassdiscard(peaks, mincl): + classlist = peaks[3] + smallclasses = [cl for cl in np.unique(classlist) if len(classlist[classlist + == cl]) < + mincl] + delete = np.zeros(len(classlist)) + for cl in smallclasses: + delete[classlist == cl] == 1 + peaks = peaks[:,delete != 1] + return peaks + +def makepeak(data_x,cutsize, maxwidth, peakx, ltr, data_ltr, rtr, data_rtr, num, minhlr): + #if len(data) > peakx + cutsize/2: + return Peak(peakx, data_x, maketr(data_ltr, ltr), maketr(data_rtr, rtr), maxwidth, num, minhlr)#data[peakx-cutsize/2:peakx+cutsize/2], num) + #else: + # return Peak(peakx, data[peakx], + # maketr(data, ltr), + # maketr(data, rtr), + # maxwidth, + # #data[peakx-cutsize/2:-1], + # num) + +def maketr(data_x, x): + if x is not None: + return Tr(x,data_x) + else: + return None + +def makepeaklist(pkfirst, data, pk, tr, cutsize, maxwidth): + peaklist = np.empty([len(pk)], dtype = Peak) + trtopk = pkfirst + pktotr = 1-pkfirst + trlen = len(tr) + pklen = len(pk) + minhlr = lambda i, mwl, mwr : min( + abs( data[pk[i]] - min( data[pk[i]-mwl:pk[i]] ) if len(data[pk[i]-mwl:pk[i]]) > 0 else 0 ) + , + abs( data[pk[i]]- min( + data[pk[i]:pk[i]+mwr] ) if len(data[pk[i]:pk[i]+mwr]) > 0 else 0 ) + ) + #print(min( data[pk[0]-0:pk[2]]) ) + + if pktotr == 0: + peaklist[0] = makepeak(data[0], cutsize, maxwidth, pk[0], None, None, tr[pktotr], data[pktotr], 0, minhlr(0, 0, maxwidth)) + else: + peaklist[0] = makepeak(data[0], cutsize, maxwidth, pk[0], + tr[-trtopk], + data[-trtopk], tr[pktotr], data[pktotr], + 0, minhlr(0, min(maxwidth, + pk[0]-tr[-trtopk]) , maxwidth)) + for i in range(1,pklen-1): + peaklist[i] = makepeak(data[pk[i]], cutsize, maxwidth, pk[i], tr[i-trtopk], data[tr[i-trtopk]], tr[i+pktotr],data[tr[i+pktotr]], i, minhlr(i, maxwidth, maxwidth)) + if pktotr == 0 and pklen <= trlen: + peaklist[pklen-1] = makepeak(data[pk[pklen-1]],cutsize, maxwidth, pk[pklen-1], tr[pklen-trtopk-1], data[pklen-trtopk-1], tr[pklen+pktotr-1], data[pklen+pktotr-1], i, minhlr(pklen-1, maxwidth, min(maxwidth, tr[pklen+pktotr-1]-pk[pklen-1]))) + else: + peaklist[pklen-1] = makepeak(data[pk[pklen-1]],cutsize, maxwidth, pk[pklen-1], tr[pklen-trtopk-1],data[pklen-trtopk-1], None, None, pklen-1, minhlr(pklen-1, maxwidth, 0)) + return peaklist + +#def doublepeaks(peaks, peakwidth): +# dif2 = peaks[1].x-peaks[0].x +# if dif2 > 5* peakwidth: +# peaks[0].real = False +# for i in range(1,len(peaks)-1): +# dif1 = dif2 +# dif2 = peaks[i+1].x-peaks[i].x +# if dif1 > 5* peakwidth and dif2 > 5* peakwidth: +# peaks[i].real = False +# if dif2 > 5* peakwidth: +# peaks[len(peaks)-1] = False +# return peaks + +def discardunrealpeaks(peaklist): + peaks = peaklist[:][np.vectorize(lambda peak: peak.real, otypes=[object])(peaklist) == True] + for i, p in enumerate(peaks): + pass + # p.num = i + return peaks + +def discardnearbypeaks(peaks, peakwidth): + peaksx = xarray(peaks) + pkdiff = np.diff(peaksx) + # peakwidth = avg_peakwidth(pknum,tr) + pknumdel= np.empty(len(peaksx)) + pknumdel.fill(False) +# peaksy = yarray(peaks) + peaksh = heightarray(peaks) + for i,diff in enumerate(pkdiff): + # #print(peaks[i].height) + if diff < peakwidth: #* peaks[i].height: ### Trial Error + if peaksh[i+1] > 1.01 *peaksh[i] : + pknumdel[i] = True + else: + # print(peaksh[i],peaksh[i+1]) + pknumdel[i+1] = True + peaks = peaks[pknumdel!=True] + for i, p in enumerate(peaks): + p.num = i + return peaks + +def interpol(data, kind): + #kind = 'linear' , 'cubic' + width = len(data) + x = np.linspace(0, width-1, num = width, endpoint = True) + return interp1d(x, data[0:width], kind , assume_sorted=True) + +def cutcenter(peak): + p = peak + cut = p.cut + pl=p.distancetoltr + pr=p.distancetortr + if pl is None: + pl = 10 + tx = p.x-10 + else: tx = p.ltr.x + if pr is None: + pr = 10 + if pl < p.maxwidth and pr > 1: + + width=len(cut) + # #print('distancetoltr',pl) + peakshape = cut + interpolfreq = 1 + xnew = np.linspace(0,len(peakshape)-1, len(peakshape)*interpolfreq, endpoint= True) + curvyf = interpol(peakshape) + curvy= curvyf(xnew) + #px = p.cutsize/2 * 4 + #left = px - (5*4) + #plt.plot(xnew, curvy) + #x_0 = optimize.fsolve(curvyf, 1.0) + # f = interp1d(x, y) + # f2 = interp1d(range(width), data[x:x+width], kind='cubic') + ##xnew = np.linspace(0, width-1, num = width*4, endpoint = True) + ##print(xnew) + # plt.plot(xnew,f2(xnew)) + ##print("show") + #plt.show + trx = (p.cutsize/2 - (p.x - tx) ) + if trx >0 : + xstart = trx + else: + xstart = 0 + # #print('pkx: ', p.x, 'ltrx: ', p.ltr.x) + # #print('trx in intpol', x) + x = xstart + if curvyf(x) < 0: + left = 0 + right= 0 + while(x < width-1 and curvyf(x) < 0) : + left = x + # #print(curvyf(x)) + x+=0.25 + right = x + # #print('x: ', x , 'left, right: ', curvyf(left), curvyf(right)) + x = left+(1-curvyf(right)/(curvyf(right)-curvyf(left)))*1/interpolfreq + # #print(x) + else: + x = 0 + # #print(x_int) + # plt.scatter(xstart, curvyf(xstart), marker = 'x', s=150, zorder=2, linewidth=2, color='red') + # plt.scatter(x, curvyf(x), marker='x', s=150, zorder=2, linewidth=2, color='black') + # plt.show + # #print(x_int) + #p.relcutcenter = (p.ltr.x + x_int)-p.x + ##print('cent',p.relcutcenter) + #return (p.ltr.x + x_int)-p.x + + # while(data[x]>0) + else: + x= 0 + + return x + +def relcutarray(peaks): + return np.vectorize(lambda peak: peak.relcutcenter)(peaks) + +def xarray(peaks): + if len(peaks)>0: + peakx = np.vectorize(lambda peak: peak.x)(peaks) + return peakx + else: return [] + +def yarray(peaks): + if len(peaks)>0: + return np.vectorize(lambda peak: peak.y)(peaks) + else: return [] + +def heightarray(peaks): + if len(peaks)>0: + return np.vectorize(lambda peak: peak.height)(peaks) + else: return [] + +def clarray(peaks): + if len(peaks)>0: + return np.vectorize(lambda peak: peak.cl)(peaks) + else: return [] +def pcclarray(peaks): + if len(peaks)>0: + return np.vectorize(lambda peak: peak.pccl)(peaks) + else: return [] + +def peakxarray( ): + peakx = np.empty([len]) + peakx = np.vectorize(lambda peak: peak.x)(peaks) + return peakx + +def peakyarray( ): + peaky= np.empty([len]) + return np.vectorize(lambda peak: peak.y)(peaks) + + +def classify( ): + #template = peaks[0] + meanfit = np.mean(np.vectorize(fit, otypes=[object])(template,peaks)) + for p in peaks: + if fit(template,p) < meanfit: + # #print('classified ', fit(template,p) , ' meanfit: ' , meanfit) + p.currentclass = 1 + +def classifyhiker(template, peaks): + meanfit = np.mean(np.vectorize(fitinterpol2, otypes=[object])(template,peaks)) + #toclassify = peaks.tolist() + firstnot = 0 + for c in range(1,5): + first = True + template = peaks[firstnot] + for i, p in enumerate(peaks[firstnot:]): + if p.currentclass == 0: + if fitinterpol2(template,p) < meanfit: + # #print('peak number ' , i, 'classified as ', c, fit(template,p) , ' meanfit: ' , meanfit) + p.currentclass = c + template = p + elif first == True: + # #print('peak number ' , i, 'classified as First! ', c, fit(template,p) , ' meanfit: ' , meanfit) + firstnot = i + first = False + else: + None + ##print('peak number ' , i, 'classified as not classified!', fit(template,p) , ' meanfit: ' , meanfit) + return peaks + + + # def Templatefitnext( , number, templnum): + # for p in peaks: + # if fit(peaks[templnum], p) < fitparameter: + +def cut_snippets(data, peaklist, rnge): + snippets = [] + positions = xarray(peaklist) + heights = heightarray(peaklist) + for pos in positions: + snippets.append(data[(pos+rnge[0]):(pos+rnge[1])]) + scaledsnips = np.empty_like(snippets) + for i, snip in enumerate(snippets): + top = -rnge[0] + # plt.plot(snip) + scaledsnips[i] = snip * 1/heights[i] + #plt.plot(scaledsnips[i]) + # print('plted') +# plt.show() + #print('1') + alignedsnips = np.empty((len(snippets), (rnge[1]-rnge[0])*10-30-10)) + standardized = np.empty((len(snippets), (rnge[1]-rnge[0])*10-10)) + intfact = 10 + for i, snip in enumerate(scaledsnips): + if len(snip) < ((rnge[1]-rnge[0])): + if i == 0: + snip =np.concatenate([np.zeros([((rnge[1]-rnge[0]) - len(snip))]),np.array(snip)]) + if i == len(scaledsnips): + snip = np.concatenate([snip, np.zeros([((rnge[1]-rnge[0])-len(snip))])]) + else: + # print('this') + snip = np.zeros([(rnge[1]-rnge[0])]) + interpoled_snip = dta.interpol(snip, 'cubic')(np.arange(0, len(snip)-1, 1/intfact)) if len(snip) > 0 else np.zeros([(rnge[1]-rnge[0]-1)*intfact ]) #interpolfactor 10 + + intsnipheight = np.max(interpoled_snip) - np.min(interpoled_snip) + if intsnipheight == 0: + intsnipheight = 1 + interpoled_snip = (interpoled_snip - max(interpoled_snip))* 1/intsnipheight + standardized[i] = interpoled_snip + #print('2') + mean = np.mean(standardized, axis = 0) + #plt.plot(mean) +# plt.show() + #plt.plot(mean[10*-rnge[0]-10*5:-10*rnge[1]+21]) +# plt.show() + meantop = np.argmax(mean) + for i, snip in enumerate(standardized): + #plt.show() + interpoled_snip = snip #standardized[i] + cc = dta.crosscorrelation(interpoled_snip[15:-15], mean) + #cc = crosscorrelation(interpoled_snip[15 + 10*-rnge[0]-10*7:-15+ -10*rnge[1]+ 31], mean[10*-rnge[0]-10*7:-10*rnge[1]+31]) + #plt.plot(interpoled_snip[15 + 10*-rnge[0]-10*7:-15+ -10*rnge[1]+ 31]) + #top = np.argmax(interpoled_snip) + #offset = meantop - top + #if not(-15 <= offset <= 15): offset = 0 + offset = -15 + np.argmax(cc) + interpoled_snip = interpoled_snip[15-offset:-15-offset] if offset != -15 else interpoled_snip[30:] + #print(offset) + #plt.plot(interpoled_snip) + if len(interpoled_snip[~np.isnan(interpoled_snip)])>0: + alignedsnips[i] = interpoled_snip + #plt.show() + # print('3') + return snippets, alignedsnips + + +#def alignclusterlabels(labels, peaklist, peaks, olddatalen): +# overlapamount = len(peaks[:,peaks[0]<30000]) +# if overlapamount == 0: +# return None +# overlappeaks = copy.deepcopy(peaks[:overlapamount]) +# if len(peaks) > 3: +# print('wieso hat peaks eine pcclklasse?') +# overlappeaks = np.append(overlappeaks,[labels], axis = 0) +# overlap_peaklist = connect_blocks(old_peaklist) +# overlap_peaklist.classesnearbypccl = [-1]*len(overlap_peaklist.classesnearbypccl) +# classified_overlap = dta.ampwalkclassify3_refactor(overlappeaks,overlap_peaklist) +# +# labeltranslator = {} +# for cl in np.unique(classified_overlap[3]): +# if len(labeltranslator) <= len(np.unique(labels)): +# labelindex = np.where(classified_overlap[3] == cl)[0] +# label = labels[labelindex] +# labelindex = labelindex[np.where(label == stats.mode(label)[0])[0][0]] +# newlabel = labels[labelindex] +# try: +# oldlabel = old_peaklist.classesnearbypccl[::-1][old_peaklist.classesnearby[::-1].index(cl)] +# except: +# oldlabel = -2 +# try: +# labeltranslator[oldlabel] +# except KeyError: +# labeltranslator[oldlabel] = newlabel +# for lbl in peaks.classesnearbypccl: +# try: labeltranslator[lbl] +# except KeyError: labeltranslator[lbl] = lbl +# old_peaklist.classesnearbypccl = [labeltranslator[lbl] for lbl in old_peaklist.classesnearbypccl] +## print(labeltranslator) + +def fit(templ, peak): + fit = np.sum(np.square(templ.cut - peak.cut)) + return fit + +def fitinterpol2(templ,peak): + t = templ + p = peak + if p.real and t.real: + fit = np.sum(np.square(t.cutaligned-p.cutaligned)) + else: + fit = 0 + return fit + + + +def fitinterpol( templ, peak): + t = templ + p = peak + if p.real: + centerp = cutcenter(p) + centert = cutcenter(t) + shiftp = centerp-p.cutsize/2 + shiftt = centert-t.cutsize/2 + + if shiftp > -5: + shiftp = min(5, 5+centerp-p.cutsize/2) + else: shiftp = 0 + + if shiftt > -5: + shiftt = min(5, 5+centert-t.cutsize/2) + else: shiftt = 0 + + xnew = np.linspace(0,p.cutsize-11, (p.cutsize-1) * 4,endpoint = True) + #peak_interpoled = interpol(p.cut)(xnew) + #plt.plot(xnew, interpol(p.cut)(xnew+shift)) + # #print(interpol(templ.cut)(xnew+shiftt)-interpol(p.cut)(xnew+shiftp)) + fit = np.sum(np.square(interpol(templ.cut)(xnew+shiftt)-interpol(p.cut)(xnew+shiftp))) + else: + fit = 0 + return fit + + +def plotdata(peaks, data): + x = xarray(peaks) + y = yarray(peaks) + plt.plot(range(len(data)),data) + plt.plot(x, y, '.r', ms=20) + #for p in peaks: + # #print(p.height, p.x, p.y, p.distancetoltr, p.distancetortr, p.nexttrdistance) + # plt.plot(tr, data[tr], '.g', ms=20) + plt.show() + + +def plotdatabyx(peaksx, data): + x = peaksx + y = data[peaksx] + plt.plot(range(len(data)),data) + plt.plot(x, y, '.r', ms=20) + plt.show() + #for p in peaks: + # #print(p.height, p.x, p.y, p.distancetoltr, p.distancetortr, p.nexttrdistance) + # plt.plot(tr, data[tr], '.g', ms=20) + +def plotpeak(peaks): + #plt.plot(peaks), cutpeaks) #bei betrachtung aller blocks zu groß! + for p in peaks: + plt.plot(range(p.cutsize),p.cut) + #plt.plot(pk, x[pk] , '.r', ms=20) + plt.show() + + +def periodicinclass(peaks, cl): + noiselist = [] + classlist = np.vectorize(lambda peak: peak.cl, otypes=[object])(peaks) + peaks = xarray(peaks) + peaks = peaks[:][classlist == cl] + periodic = [] + periodiccollector = [] + error2 = [] + isperiodic = True + b=1 + c=2 + ctofar = False + compdif = 0 + dif = 0 + count = 1 + foundtriple = False + next = 0 + for i in range(len(peaks)-1): + if i != next: continue + # #print(i, 'foundtriple', foundtriple) + error2 = [] + b=1 + c=0 + A = peaks[i] + B = peaks[i+b] + compdif = dif + while foundtriple == True and count <= 3 and i+1 < len(peaks)-1: + while B-A < compdif*1.5 and i+b+1 < len(peaks)-1: + # #print('newdif: ', B-A, 'olddif:' , dif) + if abs((B-A) - compdif) < compdif*0.4: + error2.append(abs((B-A) - dif)) + b+=1 + B = peaks[i+b] + if len(error2) > 0: + bestB = error2.index(min(error2)) + B = peaks[i+1 + bestB] + periodic.append(B) + dif = 0.5*(dif + (B-A)) + # #print('match found') + b = 1+bestB + break + else: + count+=1 + compdif = dif*count + else: + if foundtriple == True: + # #print('no further match found, ') + isperiodic = False + + + + + while foundtriple == False and i+c< len(peaks)-1: + while i+c < len(peaks)-1: + A = peaks[i] + B = peaks[i+b] + C = peaks[i+c] + dif1 = B - A + dif2 = C - B + if (C-B > (B-A)*1.5): + break + if abs(dif1 - dif2) < dif1*0.4: + error2.append(abs(dif1-dif2)) + c +=1 + #C = peaks[i+c] # C weiterlaufenlassen, bis zu weit + else: + if len(error2) == 0: + # #print('no triple found') + isperiodic = False + if len(error2) > 0: + bestC = error2.index(min(error2)) + C = peaks[i+2 + bestC] + c = 2+ bestC + periodic.extend((A,B,C)) + dif1 = B - A + dif2 = C - B + # #print('dif1: ', dif1, 'dif2: ', dif2) + dif = 0.5*(dif2+dif1) + foundtriple = True + # #print('triple found', i+c, 'dif : ', dif) + else: + error2 = [] # B weiterlaufen lassen, C reset auf B+1 + b +=1 + c = b+1 + + if isperiodic == False: + if len(periodic) > 3: + periodiccollector.append(periodic) + isperiodic = True + periodic = [] + if c!=0: + next = i+c + else: + next = i+b + if len(periodiccollector) > 0: + # for i in range(len(periodiccollector)): + # #print('collector ', i, periodiccollector[i]) + return periodiccollector + else: + #print('no periodicity found') + return [] + + + +def noisediscard(peaklist, tsh_n, ultimate_threshold): + detected_noise = False + ##print('noisetsh: ', tsh_n) + for p in peaklist.list: + + if p.height < tsh_n or p.height < ultimate_threshold: + p.noise = True + detected_noise = True + peaklist.list = peaklist.list[:][np.vectorize(lambda peak: peak.noise, otypes=[object])(peaklist.list) == False] + # #print(peaks) + # for cl in classlist: + # diff = np.vectorize(lambda peak: peak.x, otypes=[object])(peaks[:][classlist == cl]) + # meandiff = np.mean(diff) + # msecompare = np.mean(np.square(diff-(diff*0.8))) + # mse = np.mean(np.square(diff-meandiff)) + # if mse > msecompare: + # noiselist.append(cl) + # for p in peaks: + #if p.cl in noiselist: + # if p.height < 0.1: + # p.noise = True + # peaks = peaks[:][np.vectorize(lambda peak: peak.noise, otypes=[object])(peaks) == False] + # return peaks + return detected_noise + + +def plotPCclasses_ref(peaks, data): + plt.plot(range(len(data)),data, color = 'black') + print(peaks) + classlist = np.array(peaks[3],dtype = 'int') + cmap = plt.get_cmap('jet') + colors =cmap(np.linspace(0, 1.0, 3000)) #len(np.unique(classlist)))) + np.random.seed(22) + np.random.shuffle(colors) + colors = [colors[cl] for cl in np.unique(classlist)] + print('classlist', np.unique(classlist)) + # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.c', ms=20) + # x=0 +# if len(classlist)>0: + # #print(classlist) + # #print('classes: ' , np.unique(classlist)) + #from collections import Counter + #count = Counter(classlist) + # #print('longest class: ', count.most_common()[0]) + for num, color in zip(np.unique(classlist), colors): + if num == -1 : + color = 'black' + peaksofclass = peaks[:,classlist == num] + print(num) + plt.plot(peaksofclass[0], peaksofclass[1], '.', color = color, ms =20) + #plt.scatter(peaks[0], peaks[2]) + # for p in peaks: + # plt.text(p.x, p.y, p.num) + #plt.show() + + print('show pcclasses') + plt.show() + plt.close() + +def plotampwalkclasses_refactored(peaks, data): + plt.plot(range(len(data)),data, color = 'black') + classlist = np.array(peaks[3],dtype=np.int) + cmap = plt.get_cmap('jet') + colors =cmap(np.linspace(0, 1.0, 3000)) #len(np.unique(classlist)))) + np.random.seed(22) + np.random.shuffle(colors) + colors = [colors[cl] for cl in np.unique(classlist)] + # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.c', ms=20) + # x=0 +# if len(classlist)>0: + # #print(classlist) + # #print('classes: ' , np.unique(classlist)) + #from collections import Counter + #count = Counter(classlist) + # #print('longest class: ', count.most_common()[0]) + for cl, color in zip(np.unique(classlist), colors): + peaksofclass = peaks[:,classlist == cl] + #xpred = linreg_pattern(peaksofclass[0:3]) + #for p in peaksofclass[0:3]: + # #print(p.x) + ##print(xpred, peaksofclass[3].x) + + #if len(peaksofclass) > 1000: + # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.', color = 'red', ms =20) + #else: + + plt.plot(peaksofclass[0],peaksofclass[1], '.', color = color, ms =20) + plt.scatter(peaksofclass[0], peaksofclass[2]) + # for p in peaks: + # plt.text(p.x, p.y, p.num) + plt.show() + + # plt.show() + plt.close() + + +def crosscorrelation(sig, data): + autocorr = signal.fftconvolve(data, sig[::-1], mode='valid') + return autocorr + +def plottemplatefits(data, peaks, tr, templnum): + # + plotdata(peaks, data, tr) + plt.plot(range(len(data)),data) + classes = np.vectorize(lambda peak: peak.currentclass, otypes=[object])(peaks) + class1 = peaks[:][classes == 1 ] + if len(class1) > 0: + plt.plot(xarray(class1), yarray(class1), '.r', ms=20) + class2 = peaks[:][classes == 2 ] + if len(class2) > 0: + plt.plot(xarray(class2), yarray(class2), '.g', ms=20) + class3 = peaks[:][classes == 3 ] + if len(class3) > 0: + plt.plot(xarray(class3), yarray(class3), '.c', ms=20) + class4 = peaks[:][classes == 4 ] + if len(class4) > 0: + plt.plot(xarray(class4), yarray(class4), '.y', ms=20) + + # for p in peaks: # <-- + # plt.text(p.x , p.y, p.num) + + # plt.plot(tr, data[tr], '.g', ms=20) + plt.show() + +def linreg_pattern(peaks): + from sklearn import datasets, linear_model + from sklearn.metrics import mean_squared_error, r2_score + + peaksx = xarray(peaks) + peaksx = peaksx.reshape(-1,1) + #peaksh = heightarray(peaks) + #peakx = peak.x + # Create linear regression object + regr = linear_model.LinearRegression() + numbers = np.arange(len(peaks)).reshape(-1,1) + # Train the model using the training sets + regr.fit(numbers, peaksx) + + # Make predictions using the testing set + peakx_pred = regr.predict(len(peaks)) + # # The coefficients + # #print('Coefficients: \n', regr.coef_) + # # The mean squared error + # #print("Mean squared error: %.2f" + # % mean_squared_error(diabetes_y_test, diabetes_y_pred)) + # # Explained variance score: 1 is perfect prediction + # #print('Variance score: %.2f' % r2_score(diabetes_y_test, diabetes_y_pred) + + + # Plot outputs + #plt.scatter(peaksx, peaksh, color='black') + #plt.scatter(peakx, peakh_pred, color='blue') + + #plt.xticks(()) + #plt.yticks(()) + + # plt.show() + + return peakx_pred + +def linreg(peaks, peak): + from sklearn import datasets, linear_model + from sklearn.metrics import mean_squared_error, r2_score + + peaksx = xarray(peaks) + peaksx = peaksx.reshape(-1,1) + peaksh = heightarray(peaks) + peakx = peak.x + # Create linear regression object + regr = linear_model.LinearRegression() + + # Train the model using the training sets + regr.fit(peaksx, peaksh) + + # Make predictions using the testing set + peakh_pred = regr.predict(peakx) + + # # The coefficients + # #print('Coefficients: \n', regr.coef_) + # # The mean squared error + # #print("Mean squared error: %.2f" + # % mean_squared_error(diabetes_y_test, diabetes_y_pred)) + # # Explained variance score: 1 is perfect prediction + # #print('Variance score: %.2f' % r2_score(diabetes_y_test, diabetes_y_pred) + + + # Plot outputs + #plt.scatter(peaksx, peaksh, color='black') + #plt.scatter(peakx, peakh_pred, color='blue') + + #plt.xticks(()) + #plt.yticks(()) + + # plt.show() + + + + return peakh_pred + +def wp_transform(x): + import pywt + wp = pywt.WaveletPacket(data=x, wavelet='haar', mode='symmetric') + print('maxlevel: ', wp[''].maxlevel) + return (np.array([node.data for node in wp.get_level(wp[''].maxlevel, 'freq')])).flatten() + +def wpfeats(snips): + size = len(wp_transform(snips[0])) + wp = np.empty([len(snips), size]) + for i, snip in enumerate(snips): + print(wp_transform(snip)) + wp[i] = (wp_transform(snip)) + #wp = wp.T + print(wp[0]) + wpcoef = wp.T + print(wp[0]) + from sklearn.preprocessing import StandardScaler + wpcoef = StandardScaler().fit_transform(wpcoef) + coeffvalues = [] + for coeff in wpcoef: + stat, crit, sig = stats.anderson(coeff, dist = 'norm') + # coeffvalues.append(stat) + coeffvalues.append(np.sum(np.abs(coeff))) + coeffvalues = np.array(coeffvalues) + coeffs = np.argsort(coeffvalues)[::-1][:10] + print(coeffvalues[coeffs]) + return wp.T[coeffs] + + + + +def pc(cutsnippets, peaklist): + # (observations, features) matrix + M = np.empty([len(cutsnippets), len(cutsnippets[0])]) + for i, snip in enumerate(cutsnippets): + M[i] = snip[:] + from sklearn.preprocessing import StandardScaler + StandardScaler().fit_transform(M) + # #print(M.shape, ' Mshape') + # singular value decomposition factorises your data matrix such that: + # + # M = U*S*V.T (where '*' is matrix multiplication) + # + # * U and V are the singular matrices, containing orthogonal vectors of + # unit length in their rows and columns respectively. + # + # * S is a diagonal matrix containing the singular values of M - these + # values squared divided by the number of observations will give the + # variance explained by each PC. + # + # * if M is considered to be an (observations, features) matrix, the PCs + # themselves would correspond to the rows of S^(1/2)*V.T. if M is + # (features, observations) then the PCs would be the columns of + # U*S^(1/2). + # + # * since U and V both contain orthonormal vectors, U*V.T is equivalent + # to a whitened version of M. + + U, s, Vt = np.linalg.svd(M, full_matrices=False) + V = Vt.T + + # PCs are already sorted by descending order + # of the singular values (i.e. by the + # proportion of total variance they explain) + S = np.diag(s) + # PC = (s*V) + # PCs: + #print(U.shape) + #print(S.shape) + #print(V.shape) + #print(s[0], U[0,:]) + + #PC1 = (s[0] * U[:,0]) + #PC2 = (s[1] * U[:,1]) + #for i, p in enumerate(peaklist): + # p.pc1 = PC1[i] + # p.pc2 = PC2[i] + + #mu = peaks.mean(axis=0) + #fig, ax = plt.subplots() + #ax.scatter(xData, yData) + #for axis in U: + # start, end = mu, mu + sigma * axis + # ax.annotate( + # '', xy=end, xycoords='data', + # xytext=start, textcoords='data', + # arrowprops=dict(facecolor='red', width=2.0)) + #ax.set_aspect('equal') + #plt.show() + + + # if plot_steps: + # plt.scatter(PC1, PC2) + # plt.show() + + # PCData1 = (U[:,0]*M) + # PCData2 = (U[:,1]*M) + # plt.scatter(PCData1, PCData2) + # plt.show() + + #plt.scatter(U[:,0],U[:,1]) + #plt.show() + #print('done') + #return PC + + # if we use all of the PCs we can reconstruct the noisy signal perfectly + #Mhat = np.dot(U, np.dot(S, V.T)) + #print('Using all PCs, MSE = %.6G' %(np.mean((M - Mhat)**2))) + + #plt.show() + return S@U.T + +def gettime(x, samplerate, starttime): + startm = int(starttime[-2:]) + starth = int(starttime[:-2]) + seconds = x/samplerate + m, s = divmod(seconds, 60) + m = m + startm + h, m = divmod(m, 60) + h = h+starth + return "%d:%02d:%02d" % (h, m, s) + +#def connect_blocks(oldblock): +# newblock = Peaklist([]) +# newblock.lastofclass = oldblock.lastofclass +# newblock.lastofclassx = oldblock.lastofclassx +# newblock.classesnearby = oldblock.classesnearby +# newblock.classesnearbypccl = oldblock.classesnearbypccl +# newblock.classesnearbyx = [clnearbyx - oldblock.len for clnearbyx in oldblock.classesnearbyx] +# return newblock +# ##print('classesnearbyx! old, new ' , oldblock_len,oldblock.classesnearbyx , newblock.classesnearbyx) + +if __name__ == '__main__': + main() + + + +# deleted Code, but unsure if really want to delete: + + #nix #print( b.data_arrays) + + # for cl in np.unique(cllist): + + # currentfish_x = x[:][cllist == cl] + # currentfish_y = y[:][cllist == cl] + # currentfish_h = x[:][cllist == cl] + + + #nix try: + #nix xpositions[cl] = b.create_data_array("f%d_eods" %cl, "spiketimes", data = currentfish_x) + #nix xpositions[cl].append_set_dimension() + #nix # thisfish_eods = b.create_multi_tag("f%d_eods_x"%cl, "eods.position", xpositions[cl]) + #nix # thisfish_eods.references.append(nixdata) + #nix except nix.pycore.exceptions.exceptions.DuplicateName: + #nix + #nix xpositions[cl].append(currentfish_x) + + + #thisfish_eods.create_feature(y, nix.LinkType.Indexed) + #b.create_multi_tag("f%d_eods_y"%cl, "eods.y", positions = y) + #b.create_multi_tag("f%d_eods_h"%cl, "eods.amplitude", positions = h) + #thisfish_eods.create_feature + + + + +# in analyseEods +# in analyseEods classlist = eods[3] #np.vectorize(lambda peak: peak.cl, otypes=[object])(worldpeaks.list) +# in analyseEods fishclass = {} +# in analyseEods #print('classlist: ', classlist) +# in analyseEods # #print('Classes at end: ', np.unique(classlist)) +# in analyseEods +# in analyseEods +# in analyseEods fishes = {} +# in analyseEods for num in np.unique(classlist): +# in analyseEods fishes[num] = eods[:,:][: , classlist == num] +# in analyseEods +# in analyseEods +# in analyseEods +# in analyseEods +# in analyseEods fishes = fill_hidden_3(fishes) # cl-dict : x y z -dict +# in analyseEods #maxlencl = max(fishes, key=lambda k: fishes[k]['x'][-1]-fishes[k]['x'][0]) +# in analyseEods +# in analyseEods fishes, weirdparts = fill_holes(fishes) +# in analyseEods fishes, weirdparts = fill_holes(fishes) +# in analyseEods +# in analyseEods for cl in np.unique(classlist): +# in analyseEods isi = [isi for isi in np.diff(fishes[cl]['x'])] +# in analyseEods fishes[cl][3]= isi +# in analyseEods + + +#npFish +#npFish npFishes = {} +#npFish fishfeaturecount = len(fishes[cl]) +#npFish for cl in np.unique(classlist): +#npFish npFishes[cl]= np.zeros([fishfeaturecount, len(fishes[cl]['x'])]) +#npFish for i, feature in enumerate(['x', 'y', 'h', 'isi']): #enumerate(fishes[cl]): +#npFish if feature == 'isi': +#npFish fishes[cl][feature].append(fishes[cl][feature][-1]) +#npFish # #print(feature, cl) +#npFish npFishes[cl][i] = np.array(fishes[cl][feature]) +#npFish # #print(npFishes[classlist[0]][0]) +#npFish # #print(npFishes[classlist[0]][2]) +#npFish # #print(npFishes[classlist[0]][3]) +#npFish #np.savetxt('worldpeaks_x_y_cl_2', (x,y,cl, isi), fmt="%s") +#npFish +#npFish np.set_printoptions(threshold=np.nan) +#npFish +#npFish for i, cl in enumerate(np.unique(classlist)): #Neue Klassennamen! +#npFish x = npFishes[cl][0] +#npFish y = npFishes[cl][1] +#npFish h = npFishes[cl][2] +#npFish isi = npFishes[cl][3] +#npFish +#npFish np.savetxt(filename[:-4]+'Fish_xyhisi_cl%d' % i, npFishes[cl], fmt="%s") +#npFish +#npFish +#npFish + + + + + + # / TODO: Peakclassifikator bei weit wegliegenden klassen? Done + # / TODO: Class2 implementation auf class linreg übertragen Done - Doof + # TODO: Klassen zusammenfuegen/ Noise zusammenfuegen + # - Wenn last 3 und first 3 zueinander passen in 1. Amplitude und 2. Periode (falls peaks) oder 2. randomzeugs? - Noiseerkennung und 2. Amplitude + # TODO: Klassen filtern auf Patternausreißer + # diff --git a/thunderfish/DextersThunderfishAddition/analyseDexThinned.py b/thunderfish/DextersThunderfishAddition/analyseDexThinned.py new file mode 100644 index 00000000..aa131ad7 --- /dev/null +++ b/thunderfish/DextersThunderfishAddition/analyseDexThinned.py @@ -0,0 +1,2262 @@ +# Script to detect and classify EODs in recordings of weakly electric pulse +# fish, Dexter Früh, 2018 +# +# it is suggested to save the recording in +# workingdirectory/recording/recording.WAV + +# results will be saved in workingdirectory/recording/ +# +# input: +# - [Recorded Timeseries] recording.WAV +# outputs(optional): +# - [Detected and Classified EODs] +# (Numpy Array with Shape (Number of EODs, 4 (Attributes of EODs)), +# with the EOD-Attributes +# - x-location of the EOD +# (time/x-coordinate/datapoint in recording) +# - y-location of the EOD +# (Amplitude of the positive peak of the pulse-EOD) +# - height of the EOD(largest distance between peak and through in the EOD) +# - class of the EOD +# eods_recording.npy +# - [plots of the results of each analyse step for each +# analysepart (timeinterval of length = deltat) of the recording] +# +# required command line arguments at function call +# - save : if True, save the results to a numpy file (possibly +# overwrite existing) +# - plot : if True, plot results in each analysestep +# - new : if True, do a new analysis of the recording, even if there +# is an existing analyzed .npy file with the right name. +# +# call with: +# python3 scriptname.py save plot new (starttime endtime[sec] for only +# partial analysis) +# +# other parameters are behind imports and some hardcoded at the relevant +# codestep +import sys +import numpy as np +import copy +from scipy.stats import gmean +from scipy import stats +from scipy import signal +from scipy import optimize +import matplotlib +from fish import ProgressFish +import matplotlib.pyplot as plt +from thunderfish.dataloader import open_data +from thunderfish.peakdetection import detect_peaks +from scipy.interpolate import interp1d +from scipy.signal import savgol_filter +from collections import deque +import ntpath +import nixio as nix +import time +import os +from shutil import copy2 +from ownDataStructures import Peak, Tr, Peaklist + +from IPython import embed +# parameters for the analysis + +deltat = 30.0 # seconds of buffer size +thresh = 0.04 # minimal threshold for peakdetection +peakwidth = 20 # width of a peak and minimal distance between two EODs + +# basic parameters for thunderfish.dataloader.open_data +verbose = 0 +channel = 0 + +# timeinterval to analyze other than the whole recording +#starttime = 0 +#endtime = 0 +#timegiven = False + +def main(): # analyse_dex.py filename save plot new (optional starttime endtime [sec]) + # home = os.path.expanduser('~') + # os.chdir(home) + # defaults for optional arguments + timegiven = False + plot_steps = False + + # parse command line arguments - filepath, save, plot, new (, starttime, + # endtime) + filepath = sys.argv[1] + #thresh = 0.05 + save = int(sys.argv[2]) + plot_steps = int(sys.argv[3]) + new = int(sys.argv[4]) + if len(sys.argv[:])>5: + timegiven = True + starttime = int(sys.argv[5]) + endtime = int(sys.argv[6]) + #print(starttime, endtime) + # plot_steps = 1 + peaks = np.array([]) + troughs = np.array([]) + cutsize = 20 + maxwidth = 50 #10 + ultimate_threshold = thresh+0.01 + filename = path_leaf(filepath) + + ### ## ask user before overwriting + # if save == 1: + # proceed = input('Really want to save data and possibly overwrite existing? [y/n]').lower() + # if proceed == 'n': + # quit() + # elif proceed == 'y': + # printcat file | while read line + # do + #do something + # done('continuing') + # elif proceed != 'y': + # quit() + datasavepath = filename[:-4] + print(datasavepath) + eods_len = 0 + + ### ## starting analysis if it is wished or the analyzed EODs-file is not available in the working directory + if new == 1 or not os.path.exists(filename[:-4]+"/eods5_"+filename[:-3]+"npy"): + + ### ## import data + with open_data(filepath, channel, deltat, 0.0, verbose) as data: + + if save == 1 or save == 0: + # datasavepath = filename[:-4]+"/"+filename + if not os.path.exists(datasavepath): + os.makedirs(datasavepath) + copy2(filepath, datasavepath) + samplerate = data.samplerate + + ### ## split datalength into smaller blocks + nblock = int(deltat*data.samplerate) + if timegiven == True: + #print(starttime, samplerate) + parttime1 = starttime*samplerate + # parttime1 = samplerate * 10270 + parttime2 = endtime*samplerate + data = data[parttime1:parttime2] + if len(data)%nblock != 0: + blockamount = len(data)//nblock + 1 + else: + blockamount = len(data)//nblock + bigblock = [] + + ### ## output first (0%) progress bar + print('blockamount: ' , blockamount) + progress = 0 + print(progress, '%' , end = " ", flush = True) + fish = ProgressFish(total = blockamount) + olddatalen = 0 + startblock = 0 + ## iterating through the blocks, detecting peaks in each block + for idx in range(startblock, blockamount): + + ### ## print progress + if progress < (idx*100 //blockamount): + #print(progress, '%' , end = " ", flush = True) + progress = (idx*100)//blockamount + # print('.' , end = '') + progressstr = 'Partstatus: '+ str(0) + ' '*2 + ' % (' + '0' + ' '*4+ '/' + '?'+' '*4+ '), Filestatus:' + fish.animate(amount = idx, dexextra = progressstr) + progressstr = 'Partstatus: '+ 'Part ' + '0'+ '/''5'+' Filestatus:' + fish.animate(amount = idx, dexextra = progressstr) + + ### ## take debugging times, not used right now + time1 = time.time() + #print('took ', time1-time0, 's') + time0 = time1 + + # time measurement of parts of the algorithm to find time + # efficiency bottlenecks + bottletime = [] + bottletime.append(time.time()) #0 + + datx = data[idx*nblock:(idx+1)*nblock] + + ### ## smoothing of the timeseries and calculating autocorrelation - not used + #from scipy.signal import butter, lfilter + #datx = savgol_filter(datx, 11, 7) + #fs = samplerate # 1 ns -> 1 GHz + #cutoff = samplerate/10 # 10 MHz + #B, A = butter(5, cutoff / (fs / 3), btype='low') # 1st order Butterworth low-pass + #datx = lfilter(B, A, datx, axis=0) + #plt.plot(datx) + #plt.show() + #sig = data[-320000:-1] + #autocorr = signal.fftconvolve(sig, sig, mode='full') + #plt.plot(autocorr) + #plt.show() + #f, Pxx_den = signal.periodogram(sig, samplerate) + #plt.plot(Pxx_den) + #plt.show() + #x = savgol_filter(x, 11, 7) + + # ---------- analysis ----------- + # step1: detect peaks in timeseries + pk, tr = detect_peaks(datx, thresh) + troughs = tr + bottletime.append(time.time()) #1 + # continue with analysis only if multiple peaks are detected + if len(pk) > 2: + def makepeaklist_refactor(pk,tr,data): + ### ## create 'peaks' with x,y and height and discard peaks that seem to be no EODs based on their width and simple features like - no minimum close to the maximum. + # decide whether a peak or a through is detected first + pkfirst = int((min(pk[0],tr[0])= 0 and right_tr_ind < len(tr): + # ltr_x = tr[left_tr_ind] + # ltr_y = datx[ltr_x] + # rtr_x = tr[right_tr_ind] + # rtr_y = datx[rtr_x] + if min((pk_x - ltr_x),(rtr_x -pk_x)) > peakwidth: + pk_r[...] = False + elif max((pk_x - ltr_x),(rtr_x -pk_x)) <= peakwidth: + pk_h[...] = pk_y - min(ltr_y, rtr_y) + else: + if (pk_x-ltr_x)<(rtr_x-pk_x): + pk_h[...] = pk_y-ltr_y + else: + pk_h[...] = pk_y -rtr_y + elif left_tr_ind == -1: + if rtr_x-pk_x > peakwidth: + pk_r[...] = False + else: + pk_h[...] = pk_y- rtr_y + elif right_tr_ind == len(tr): + if pk_x-ltr_x > peakwidth: + pk_r[...] = False + else: + pk_h[...] = pk_y-ltr_y + peaks = np.array([peaks_x, peaks_y, peaks_h], dtype = np.float)[:,peaks_real!=0] + return peaks + peaks = makepeaklist_refactor(pk,tr,datx) + #plt.plot(data[0:32000]) + #for ik in peaks.list[0:400]: + # plt.scatter(i.x, i.height) + #plt.show() + bottletime.append(time.time()) #2 + def discardnearbypeaks_refactor(peaks, peakwidth): + ### ## discard peaks that are close to each other, as a EOD mostly has more than one maximum and only one of the maxima is considered to be the EOD/EODlocation + unchanged = False + while unchanged == False: + x_diffs = np.diff(peaks[0]) + peaks_heights = peaks[2] + peaks_delete = np.zeros(len(peaks[0])) + for i, diff in enumerate(x_diffs): + if diff < peakwidth: + if peaks_heights[i+1] > peaks_heights[i] : + peaks_delete[i] = 1 + else: + peaks_delete[i+1] = 1 + peaks = peaks[:,peaks_delete!=1] + if np.count_nonzero(peaks_delete)==0: + unchanged = True + return peaks + peaks = discardnearbypeaks_refactor(peaks,peakwidth) +# plt.plot(datx) +# plt.scatter(peaks[0],peaks[1]) +# plt.show() +# ### ## tries to calculate the noiselevel in the current recording part. Might actually not do anything at all, because the ultimate_threshold might be larger eitherway. some recordings have some exploitable data below this threshold, but most don't. And the rate of errors just gets too big for such small peaks. +# if len(peaks.list) > 2: +# tsh_n = calc_tsh_noise(peaks.list, datx) + bottletime.append(time.time()) #5 + # if len(peaks.list) > 2: + # noisediscard(peaks, ultimate_threshold, ultimate_threshold) + bottletime.append(time.time()) #6 + progressstr = 'Partstatus: '+ 'Part ' + '1'+ '/''5'+' Filestatus:' + fish.animate(amount = idx, dexextra = progressstr) + if len(peaks) > 0: + bottletime.append(time.time()) #7 + ### ## connects the current part with the one that came before, to allow for a continuous analysis + if idx >= startblock+1: + peaklist = connect_blocks(peaklist) + else: + peaklist = Peaklist([]) + bottletime.append(time.time()) #8 + #print('\n ') + #print('cut_snips, with ' ,len(peaks.list), 'peaks') + # cuts snippets from the data time series around the peaks, interpolates them and aligns them + def cut_snippets_refactor(data, peaks, rnge): + snippets = [] + positions = np.array(peaks[0],dtype=np.int) + heights = peaks[2] + intfact = 10 + alignrange = 1.5 + alignwidth = int(np.ceil(alignrange * intfact) ) + for pos in positions: + snippets.append(data[(pos+rnge[0]):(pos+rnge[1])]) + scaled_snips = np.empty_like(snippets) + for i, snip in enumerate(snippets): + top = -rnge[0] + #plt.plot(snip) + scaled_snips[i] = snip * 1/heights[i] + #plt.plot(scaledsnips[i]) + #plt.show() + aligned_snips = np.empty((len(snippets), (rnge[1]-rnge[0])* + intfact-(2*alignwidth)-intfact)) + ipoled_snips = np.empty((len(snippets), (rnge[1]-rnge[0])*intfact-intfact)) + + for i, snip in enumerate(scaled_snips): + if len(snip) < ((rnge[1]-rnge[0])): + if i == 0: + snip = np.concatenate([np.zeros([((rnge[1]-rnge[0]) - len(snip))]),np.array(snip)]) + if i == len(scaledsnips): + snip = np.concatenate([snip, np.zeros([((rnge[1]-rnge[0])-len(snip))])]) + else: + snip = np.zeros([(rnge[1]-rnge[0])]) + interpolation = interpol(snip, 'cubic') #if len(snip) > 0 else np.zeros([(rnge[1]-rnge[0]-1)*intfact ]) + interpoled_snip = interpolation(np.arange(0, len(snip)-1, 1/intfact)) + intsnipheight = np.max(interpoled_snip) - np.min(interpoled_snip) + if intsnipheight == 0: + intsnipheight = 1 + interpoled_snip = (interpoled_snip - max(interpoled_snip))* 1/intsnipheight + ipoled_snips[i] = interpoled_snip + + mean = np.mean(ipoled_snips, axis = 0) + meantop = np.argmax(mean) + #plt.plot(mean) + #plt.show() + #plt.plot(mean[10*-rnge[0]-10*5:-10*rnge[1]+21]) + #plt.show() + for i, interpoled_snip in enumerate(ipoled_snips): + cc = crosscorrelation(interpoled_snip[alignwidth:-alignwidth], mean) + #cc = crosscorrelation(interpoled_snip[15 + 10*-rnge[0]-10*7:-15+ -10*rnge[1]+ 31], mean[10*-rnge[0]-10*7:-10*rnge[1]+31]) + offset = -15 + np.argmax(cc) + interpoled_snip = interpoled_snip[15-offset:-15-offset] if offset != -15 else interpoled_snip[30:] + #plt.plot(interpoled_snip) + if len(interpoled_snip[~np.isnan(interpoled_snip)])>0: + aligned_snips[i] = interpoled_snip + #plt.show() + return snippets, aligned_snips + snips, aligned_snips = cut_snippets_refactor(datx,peaks, [-15,15]) + # snips, scaledsnips = cut_snippets(datx, peaks.list, [-15,15]) + #wpf = wpfeats(scaledsnips) + #print(wpf[0]) + #print('pc') + progressstr = 'Partstatus: '+ 'Part ' + '2'+ '/''5'+' Filestatus:' + fish.animate(amount = idx, dexextra = progressstr) + #print('len ', len(scaledsnips)) + #print(scaledsnips) + def pc_refactor(cutsnippets): + # (observations, features) matrix + M = np.empty([len(cutsnippets), len(cutsnippets[0])]) + for i, snip in enumerate(cutsnippets): + M[i] = snip[:] + from sklearn.preprocessing import StandardScaler + from sklearn.decomposition import PCA + #StandardScaler().fit_transform(M) + pca = PCA() + pc_comp= pca.fit_transform(M) + return pc_comp + # calculates principal components + pcs = pc_refactor(aligned_snips) + #print('dbscan') + + # clusters the features(principal components) using dbscan algorithm. clusterclasses are saved into the peak-object as Peak.pccl + order = 5 + minpeaks = 3 if deltat < 2 else 10 + def dbscan_refactor(pcs, peaks, order, eps, min_samples, takekm, olddatalen): + # pcs (samples, features) + # X (samples, features) + from sklearn.cluster import DBSCAN + from sklearn import metrics + from mpl_toolkits.mplot3d import Axes3D + from sklearn.cluster import AgglomerativeClustering + try: + X = pcs[:,:order] + except: + X = pcs[:,order] + # ############################################################################# + # Compute DBSCAN + db = DBSCAN(eps, min_samples).fit(X) + from sklearn.cluster import KMeans + core_samples_mask = np.zeros_like(db.labels_, dtype=bool) + core_samples_mask[db.core_sample_indices_] = True + labels = db.labels_ ##### TODO ###### --- irgendwo Indexfehler oder so, last change - pcs richtige DImension + #peaks = np.array([np.append(peaks[:,i],labels[i]) for i in range(len(peaks[0]))]) + peaks = np.append(peaks,[labels], axis = 0) + return peaks + + peaks = dbscan_refactor(pcs, peaks, order, 0.4, minpeaks, False, olddatalen) + + #plotPCclasses(peaks.list, datx) + olddatalen = len(datx) + num = 1 + #classlist = np.vectorize(lambda peak: peak.pccl, otypes=[object])(peaks.list) + #snips, scaledsnips = cut_snippets(datx, peaks.list[classlist == num], [-15,5]) + #pcs2 = pc(scaledsnips, peaks.list[classlist==num]) + #pcs2 = wpfeats(scaledsnips) + #dbscan(pcs2, peaks.list[classlist == num],4, 0.15, 15, False) + #print('Classify') + progressstr = 'Partstatus: '+ 'Part ' + '3'+ '/''5'+' Filestatus:' + fish.animate(amount = idx, dexextra = progressstr) + + # classifies the peaks using the data from the clustered classes and a simple amplitude-walk which classifies peaks as different classes if their amplitude is too far from any other classes' last three peaks + peaks, peaklist = ampwalkclassify3_refactor(peaks, peaklist) # classification by amplitude + # print(peaks.classlist) + print(peaks) + bottletime.append(time.time()) #9 + join_count=0 + while True and joincc(peaklist, peaks) == True and join_count < 200: + join_count += 1 + continue + # print(peaks.classlist) + bottletime.append(time.time()) #10 + + # discards all classes that contain less than mincl EODs + mincl = 6 # >=1 + peaks = smallclassdiscard(peaks, mincl) + bottletime.append(time.time()) #11 + + # discards peaks, that are too wide compared to their + # inter spike intervals and seem to be wavesfish signals + # actually... works in some cases + if len(peaks[0]) > 0: + peaks = discardwaves_refactor(peaks, datx) + + # plots the data part and its detected and classified peaks + if plot_steps == True: + plotampwalkclasses_refactored(peaks, datx) + bottletime.append(time.time()) #12 + + # map the analyzed EODs of the buffer part to the whole + # recording + worldpeaks = np.copy(peaks) + bottletime.append(time.time()) #13 + # change peaks location in the buffered part to the location relative to the + idx = 1 + # peaklocations relative to whole recording + worldpeaks[0] = worldpeaks[0] + (idx*nblock) + peaklist.len = idx*nblock +# for p in worldpeaks: +# = idx*nblock + p.x + bottletime.append(time.time()) #14 + bottletime.append(time.time()) #15 + # extract the relevant information from each peakobject of + # the buffered part and rearrange it as numpy array for + # computational efficienty + #x = xarray(thisblock) + #y = yarray(thisblock) + #h = heightarray(thisblock) + #cllist = clarray(thisblock) + #bottletime.append(time.time()) #16 + #thisblock_eods = np.array([x,y,h, cllist]) + #bottletime.append(time.time()) #17 + #bottletime.append(time.time()) #18 + #thisblockeods_len = len(thisblock_eods[0,:]) + thisblock_eods = np.delete(peaks,3,0) + thisblockeods_len = len(thisblock_eods[0]) + progressstr = 'Partstatus: '+ 'Part ' + '4'+ '/''5'+' Filestatus:' + fish.animate(amount = idx, dexextra = progressstr) + + # save the peaks of the current buffered part to a numpy-memmap on the disk + if thisblockeods_len> 0 and save == 1 or save == 0: + if idx == 0: + eods = np.memmap(datasavepath+"/eods_"+filename[:-3]+"npmmp", dtype='float64', mode='w+', shape=(4,thisblockeods_len), order = 'F') + # fp = np.memmap(filepath[:len(filename)]+"eods_"+filename[:-3]+"npy", dtype='float32', mode='w+', shape=(4,len(thisblock_eods[0,:]))) + dtypesize = 8#4 #float32 is 32bit = >4< bytes long ---changed to float64 -> 8bit + eods = np.memmap(datasavepath+"/eods_"+filename[:-3]+"npmmp", dtype='float64', mode='r+', offset = dtypesize*eods_len*4, shape=(4,thisblockeods_len), order = 'F') + eods[:] = thisblock_eods + eods_len += thisblockeods_len + bottletime.append(time.time()) #19 + #classes.extend(np.unique(cllist)) + + # to clean the plt buffer... + plt.close() + + # get and print the measured times of the algorithm parts for the + # current buffer + bottletime.append(time.time())#20 + time_a= bottletime[0] + for i, times in enumerate(bottletime): + #print('times: ' ,i, times-time_a) + time_a=times + + progressstr = 'Partstatus: '+ 'Part ' + '5'+ '/''5'+' Filestatus:' + fish.animate(amount = idx, dexextra = progressstr) + # plt.show() + + # after the last buffered part has finished, save the memory mapped + # numpy file of the detected and classified EODs to a .npy file to the + # disk + eods = np.memmap(datasavepath+"/eods_"+filename[:-3]+"npmmp", dtype='float64', mode='r+', shape=(4,eods_len), order = 'F') + print('before final saving: print unique eodcl: ' , np.unique(eods[3])) + if save == 1: + # #print('eods', eods[3]) + path = filename[:-4]+"/" + if not os.path.exists(path): + os.makedirs(path) + if eods_len > 0: + print('Saved!') + np.save(filename[:-4]+"/eods8_"+filename[:-3]+"npy", eods) + else: + #np.save(filename[:-4]+"/eods5_"+filename[:-3]+"npy", thisblock_eods) + print('not saved') + + else: # if there already has been a certain existing result file and 'new' was set to False + print('already analyzed') + + + # not used data implementation using NIX + # Save Data + + # Needed: + # Meta: Starttime, Startdate, Length + # x, y, h, cl, difftonextinclass -> freq ? , + + # Later: Find "Nofish" + # Find "Twofish" + # Find "BadData" + # Find "Freqpeak" + # ? Find "Amppeak" + # + + # bigblock = np.array(bigblock) + # x=xarray(bigblock) + # y=yarray(bigblock) + # cl=clarray(bigblock) + + + #nix file = nix.File.open(file_name, nix.FileMode.ReadWrite) + #nix b = file.blocks[0] + #nix nixdata = b.data_arrays[0] + #nix cldata = [] + #nix #print(classes) + #nix #print(b.data_arrays) + #nix for i in range(len(np.unique(classes))): + #nix cldata.append(b.data_arrays[i+1]) + + + # for cl in + + # for cl in + # x = thisfish_eods + + + #nix file.close() + +def path_leaf(path): + ntpath.basename("a/b/c") + head, tail = ntpath.split(path) + return tail or ntpath.basename(head) + +def fill_hidden(fishclasses): + + fishes = fishclasses + + nohidefishes = {} + for cl in fishes: + x =[] + y = [] + h = [] + fish = fishes[cl] + # #print('fish', fish) + fishisi = calcisi(fish) + isi = fishisi[0] + for i, newisi in enumerate(fishisi): + leftpeak = fish[i] + x.append(leftpeak.x) + y.append(leftpeak.y) + h.append(leftpeak.height) + if newisi > 2.8*isi: + guessx = leftpeak.x + isi + + while guessx < leftpeak.x + newisi-0.8*isi: + + peakx = peakaround(guessx, isi*0.1, fishes) + if peakx is not None: + x.append(peakx) + y.append(leftpeak.y) + h.append(leftpeak.height) + guessx = peakx+ isi + (peakx-guessx) + + continue + break + isi = newisi + nohidefishes[cl]= {'x':x,'y':y,'h':h} + return nohidefishes + +def plotheights(peaklist): + heights = heightarray(peaklist) + x_locations = xarray(peaklist) + plt.scatter(x_locations, heights) + plt.show() + +def ploteods(eods, data): + plt.plot(range(len(data)),data, color = 'black') + classlist = eods[3] + cmap = plt.get_cmap('jet') + colors =cmap(np.linspace(0, 1.0, 3000)) #len(np.unique(classlist)))) + np.random.seed(22) + np.random.shuffle(colors) + colors = [colors[cl] for cl in np.unique(classlist)] + # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.c', ms=20) + x=0 + if len(classlist)>0: + # #print(classlist) + # #print('classes: ' , np.unique(classlist)) + from collections import Counter + count = Counter(classlist) + # #print('longest class: ', count.most_common()[0]) + for num, color in zip(np.unique(classlist), colors): + peaksofclass = eods[:,:][:, classlist == num] + #xpred = linreg_pattern(peaksofclass[0:3]) + #for p in peaksofclass[0:3]: + # #print(p.x) + ##print(xpred, peaksofclass[3].x) + + #if len(peaksofclass) > 1000: + # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.', color = 'red', ms =20) + #else: + plt.plot(peaksofclass[0], peaksofclass[1], '.', color = color, ms =20) + plt.show() + +def fill_hidden_3(fishes): + + fishes = fishes + + nohidefishes = {} + for cl, fish in fishes.items(): + x =[] + y = [] + h = [] + # fish = fishes[cl] passt net, fishes is np.array mit (cl, (xyh)) + fishisi = np.diff(fish[0]) + isi = fishisi[0] + for i, newisi in enumerate(fishisi): + leftpeak = i + x.append(fish[0][i]) + y.append(fish[1][i]) + h.append(fish[2][i]) + # #print(cl, fish[0][i], isi, newisi) + if newisi > 2.8*isi: + guessx = fish[0][i] + isi + + while guessx < fish[0][i] + newisi-0.8*isi: + + peakx = peakaround3(guessx, isi*0.1, fishes) + if peakx is not None: + # #print(jup) + x.append(peakx) + y.append(fish[1][i]) + h.append(fish[2][i]) + guessx = peakx+ isi + (peakx-guessx) + + continue + break + isi = newisi + nohidefishes[cl]= {'x':x,'y':y,'h':h} + + return nohidefishes + +def peakaround2(guessx, interval, fishes): + found = False + for cl, fish in fishes.items(): + for px in fish['x']: + distold = interval + if px < guessx-interval: + continue + # #print('in area', guessx-interval) + if guessx-interval < px < guessx+interval: + found = True + dist = px-guessx + if abs(dist) < abs(distold): + distold = dist + if px > guessx+interval: + if found == True: + # #print(guessx, dist) + return guessx + dist + else: break + return None + +def peakaround3(guessx, interval, fishes): + found = False + for cl, fish in fishes.items(): + for px in fish[0]: + distold = interval + if px < guessx-interval: + continue + # #print('in area', guessx-interval) + if guessx-interval < px < guessx+interval: + found = True + dist = px-guessx + if abs(dist) < abs(distold): + distold = dist + if px > guessx+interval: + if found == True: + # #print(guessx, dist) + return guessx + dist + else: break + return None + +def peakaround(guessx, interval, fishes): + found = False + for cl, fish in fishes.items(): + for peak in fish: + + distold = interval + if peak.x < guessx-interval: + continue + # #print('in area') + if guessx-interval < peak.x < guessx+interval: + found = True + dist = peak.x-guessx + if abs(dist) < abs(distold): + distold = dist + if peak.x > guessx+interval: + if found == True: + # #print(guessx, dist) + return guessx + dist + else: break + return None + +def fill_holes(fishes): #returns peakx, peaky, peakheight # Fills holes that seem to be missed peaks in peakarray with fake (X/Y/height)-Peaks + retur = {} + lost = {} + for cl, fish in fishes.items(): + fishisi = np.diff(fish['x']) + mark = np.zeros_like(fishisi) + isi = 0 + ##print('mark', mark) + # #print('fishisi' , fishisi) + #find zigzag: + c=0 + c0= 0 + n=0 + for i, newisi in enumerate(fishisi): + if abs(newisi - isi)>0.15*isi: + if (newisi > isi) != (fishisi[i-1] > isi): + c+=1 + # #print(abs(newisi - isi), 'x = ', fish[i].x) + c0+=1 + elif c > 0: + n += 1 + if n == 6: + if c > 6: + # print ('zigzag x = ', fish['x'][i-6-c0], fish['x'][i-6]) + mark[i-6-c0:i-6]= -5 + c = 0 + c0=0 + n = 0 + + #if c > 0: + # #print(i, c) + # if c == 6: + # #print('zigzag!') + isi = newisi + isi = 0 + for i, newisi in enumerate(fishisi): + ##print('mark: ' , mark) + if mark[i] == -5: continue + if i+2 >= len(fishisi): + continue + if (2.2*isi > newisi > 1.8*isi) and (1.5*isi>fishisi[i+1] > 0.5*isi) : + mark[i] = 1 + isi = newisi + # #print('found 1!' , i) + elif (2.2*isi > newisi > 1.8*isi) and (2.2*isi> fishisi[i+1] > 1.8*isi) and (1.5*isi > fishisi[i+2] > 0.5*isi): + mark[i] = 1 + isi = isi + elif 3.4*isi > newisi > 2.6*isi and 1.5*isi > fishisi[i+1] > 0.5*isi: + mark[i] = 2 + + elif (0.6* isi > newisi > 0): + # #print('-1 found', i ) + if mark[i] ==0 and mark[i+1] ==0 and mark[i-1]==0 : + # isi = newisi + # continue + # #print('was not already set') + if fishisi[i-2] > isi < fishisi[i+1]: + mark[i] = -1 + # #print('-1') + elif isi > fishisi[i+1] < fishisi[i+2]: + mark[i+1] = -1 + # #print('-1') + isi = newisi + filldpeaks = [] + x = [] + y = [] + h = [] + x_lost=[] + y_lost=[] + h_lost=[] + # #print('filledmarks: ', mark) + for i, m in enumerate(mark): + if m == -1 : + # #print('-1 at x = ', fish['x'][i]) + continue + if m == -5: + x_lost.append(fish['x'][i]) + y_lost.append(fish['y'][i]) + h_lost.append(fish['h'][i]) + x.append(fish['x'][i]) + y.append(fish['y'][i]) + h.append(fish['h'][i]) + continue + x.append(fish['x'][i]) + y.append(fish['y'][i]) + h.append(fish['h'][i]) + if m == 1: + # #print('hofly added peak at x = ' , fish['x'][i]) + x.append(fish['x'][i] + fishisi[i-1]) + y.append( 0.5*(fish['y'][i]+fish['y'][i+1])) + h.append(0.5*(fish['h'][i]+fish['h'][i+1])) + elif m== 2: + x.append(fish['x'][i] + fishisi[i]) + y.append( 0.5*(fish['y'][i]+fish['y'][i+1])) + h.append(0.5*(fish['h'][i]+fish['h'][i+2])) + x.append(fish['x'][i] + 2*fishisi[i-1]) + y.append( 0.5*(fish['y'][i]+fish['y'][i+2])) + h.append(0.5*(fish['h'][i]+fish['h'][i+2])) + # #print('added at x = ', fish['x'][i] + fishisi[i]) + retur[cl] = {'x':x,'y':y,'h':h} + lost[cl] = {'xlost':x_lost,'ylost':y_lost,'hlost':h_lost} + # filledpeaks =np.array(filledpeaks) + # #print(filledpeaks.shape) + # filledpeaks. + return retur, lost + +def calc_tsh_noise(peaks, data): + heights = np.vectorize(lambda peak: peak.height)(peaks) + # peakx = xarray(peaks) + # peakxlist = peakx.tolist() + # #print('datenstdanfang: ', np.std(data)) + # datatsh = np.mean(np.abs(data))# + # datatsh = 2* np.std(data) + # peakareas = [i for x in peakx for i in range(x-10, x+10) if (i < len(data))] + # peakareas = np.arange(peakx-10, peakx+10, 1) + # relevantdata = [] + #peakareas = np.unique(peakareas) + # #print(len(peakareas), len(data), ' len peakarea and data' , datatsh) + #relevantdata is the data without the areas around the peaks, to calculate the standard deviation of the noise + #c = 0 + tsh = 0.1*np.std(heights) + + #for i, dat in enumerate(data): + # if peakareas[c] == i and c dist: + # dist = tdist + #print('dist', dist) + if dist>=0: + valid = True + if olddatalen > 0: + alignlabels(labels, peaks, olddatalen) + for i, p in enumerate(peaklist): + pcclasses[peaknum] = labels[i] + return valid + if takekm: + km = KMeans(n_clusters=3, n_init = 3, init = 'random', tol=1e-5, random_state=170, verbose = True).fit(X) + core_samples_mask = np.zeros_like(km.labels_, dtype=bool) + labels = km.labels_ + if takekm: + for i, p in enumerate(peaklist): + # print('label ', labels[i]) + pcclasses[peaknum] = p.pccl + # Number of clusters in labels, ignoring noise if present. + n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) + #print('Estimated number of clusters: %d' % n_clusters_) + # ############################################################################# + # Plot result + # Black removed and is used for noise instead. + unique_labels = set(labels) + colors = [plt.cm.Spectral(each) + for each in np.linspace(0, 1, len(unique_labels))] + fig = plt.figure() + ax = fig.add_subplot(111, projection = '3d') + for k, col in zip(unique_labels, colors): + if k == -1: + # Black used for noise. + col = [0, 0, 0, 1] + class_member_mask = (labels == k) + xy = X[class_member_mask] + # print(col) + ax.plot(xy[:, 0], xy[:, 1],xy[:,2], 'o', markerfacecolor=tuple(col), + markeredgecolor='k', markersize=14) + ax.set_title('Estimated number of clusters: %d' % n_clusters_) + #plt.show() + + + from sklearn.neighbors import kneighbors_graph + knn_graph = kneighbors_graph(X, 15, include_self=False) + ac = AgglomerativeClustering(linkage = 'complete', n_clusters = 3, connectivity = knn_graph).fit(X) + core_samples_mask = np.zeros_like(ac.labels_, dtype=bool) + labels = ac.labels_ + if takekm: + for i, p in enumerate(peaklist): + print('label ', labels[i]) + pcclasses[peaknum] = labels[i] + # Number of clusters in labels, ignoring noise if present. + n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) + #print('Estimated number of clusters: %d' % n_clusters_) + # ############################################################################# + # Plot result + # Black removed and is used for noise instead. + unique_labels = set(labels) + colors = [plt.cm.Spectral(each) + for each in np.linspace(0, 1, len(unique_labels))] + fig = plt.figure() + ax = fig.add_subplot(111, projection = '3d') + for k, col in zip(unique_labels, colors): + if k == -1: + # Black used for noise. + col = [0, 0, 0, 1] + class_member_mask = (labels == k) + xy = X[class_member_mask] + print(col) + ax.plot(xy[:, 0], xy[:, 1],xy[:,2], 'o', markerfacecolor=tuple(col), + markeredgecolor='k', markersize=14) + ax.set_title('Estimated number of clusters: %d' % n_clusters_) + #plt.show() + +def ampwalkclassify3_refactor(peaks,peaklist): # final classificator + classamount = peaklist.classamount + # for i in range(start, len(peaks)-start): + lastofclass = peaklist.lastofclass # dict of a lists of the last few heightvalues of a class, f.E ((1,[0.7,0.68,0.71]), (5, [0.2, 0.21, 0.21])) + lastofclassx = peaklist.lastofclassx # dict of a list of the last few x-values of a class + a=0 + elem = 0 + thresholder = [] + comperr = 1 + classesnearby = peaklist.classesnearby # list of the classes of the last n peaks (currently 12) f.E:[1,2,1,2,1,3,2,1,...] + classesnearbyx = peaklist.classesnearbyx # list of the x-values of the last n peaks, f.E:[13300, 13460, 13587, 13690, 13701, ...] + classesnearbypccl = peaklist.classesnearbypccl # list of the pc-classified classes of the last n peaks + classes = np.zeros((len(peaks[0]))) + pcclasses = peaks[3] + positions = peaks[0] + heights = peaks[1] + + # #print('nearbyclasses at start:' ,classesnearby, classesnearbyx) + # for peak in peaks: + # peak.cl = peak.pccl+2 + # peaklist.classlist = np.vectorize(lambda peak: peak.cl, otypes=[object])(peaklist.list) + # return peaks + cl = 0 + maxdistance = 30000 # Max distance to possibly belong to the same class + factor = 1.6 # factor by which a peak fits into a class, f.E: classheight = 1 , factor = 2 => peaks accepted in range (0.5,2) + c=0 + peakamount = len(peaks.T) + #fish = ProgressFish(total = peakamount) + for peaknum, p in enumerate(peaks.T): + perc = str((peaknum*100)//peakamount) + # fish.animate(amount = "", dexextra = 'Partstatus: '+ ' '*(3-len(perc)) +perc + ' % (' + ' '*(4-len(str(peaknum)))+str(peaknum) + '/' + ' ' *(4-len(str(peakamount)))+str(peakamount) + '), Filestatus:') + awc_btime = [] + if len(lastofclass) == 0: # Dict with all classes, containing the heights of the last few peaks + lastofclass[1] = deque() + lastofclassx[1]= deque() + lastofclass[1].append(heights[peaknum]) + lastofclassx[1].append(positions[peaknum]) + classesnearby.append(1) + classesnearbyx.append(-1) + classesnearbypccl.append(pcclasses[peaknum]) + classes[peaknum] = 1 + classamount += 1 + continue + time1 = time.time() + for i, cl in enumerate(classesnearby): + if (positions[peaknum]-classesnearbyx[i]) > maxdistance: + classesnearby.pop(i) + classesnearbyx.pop(i) + classesnearbypccl.pop(i) + lastofclassisis = [] + for i in classesnearby: + # print(i, classesnearby) + lastofclassisis.append(np.median(np.diff(lastofclassx[i]))) + meanisi = np.mean(lastofclassisis) + if 32000 > 20*meanisi> 6000: + maxdistance = 20*meanisi + #print(meanisi, maxdistance , 'maxdistance ----------------------------------------------------------------------------------------------') + + time2 = time.time() + awc_btime.append(time2-time1) #0 + cl = 0 # 'No class' + comperr = 1 + ##print('classesnearby at a peak', classesnearby) + clnrby = np.unique(classesnearby) + time1 = time.time() +# classmean = 0 + # if pcclasses[peaknum] == -1: + # factor = 1.2 + # else: + # factor = 1.6 + + for i in clnrby: + #print('cl: ', i) + # if classesnearbypccl[classesnearby.index(i)] == -1: + # factor = 2.2 + # else: factor = 1.6 + classmean = np.mean(lastofclass[i]) + logerror = np.abs(np.log2(heights[peaknum])-np.log2(classmean)) + abserror = np.abs(heights[peaknum]-classmean) + logthresh = np.log2(factor) + #ä#print(np.std(lastofclass[i])) absthresh = 0.5*classmean # #print('test log', np.abs(np.log2(np.array([0.4,0.5,1,1.5,2,2.4]))-np.log2(np.array([1,1,1,1,1,1]))) ) # abs(classmean*0.5) + #relerror = error + relerror = logerror + relabserror = abserror/thresh + # if 1140 < p.num < 1150: + # print(p.num) + # print('for classes at one peak: classmean, height, abserror, thresh', + # classmean,heights[peaknum], logerror, logthresh) + #print(len(classesnearbypccl), len(classesnearby)) + #print(classmean, heights[peaknum], logerror, logthresh, pcclasses[peaknum], classesnearbypccl[classesnearby.index(i)]) + if classesnearbypccl[classesnearby.index(i)] == pcclasses[peaknum] or pcclasses[peaknum] == -1:# or + if logerror < logthresh: ## SameClass-Condition + if relerror < comperr and (positions[peaknum]-classesnearbyx[classesnearby.index(i)]) 2*compareisierror: +# cl = holdlastcl + + time2 = time.time() + awc_btime.append(time2-time1) #1 + time1 = time.time() + if pcclasses[peaknum] != -1: + if cl != 0 : + #print(cl) + if len(lastofclass[cl]) >= 3: + lastofclass[cl].popleft() + if len(lastofclassx[cl]) >= 3: + lastofclassx[cl].popleft() + lastofclass[cl].append(heights[peaknum]) + lastofclassx[cl].append(positions[peaknum]) + classes[peaknum] = cl + else: # Add new class + cl = classamount+1 + #print('existingclasses: ', classamount) + classamount = cl + + #print('newclass: ----------------------------------------------------------------', cl) + lastofclass[cl] = deque() + lastofclassx[cl] = deque() + lastofclass[cl].append(heights[peaknum]) + lastofclassx[cl].append(positions[peaknum]) + classes[peaknum] = cl + classesnearby.append(cl) + classesnearbyx.append(positions[peaknum]) + classesnearbypccl.append(pcclasses[peaknum]) + ##print('tatsaechlich: ', cl) + if len(classesnearby) >= 12: #kacke implementiert? + minind = classesnearbyx.index(min(classesnearbyx)) + del lastofclass[classesnearby[minind]] + del lastofclassx[classesnearby[minind]] + #print(classesnearby[minind], 'del') + classesnearby.pop(minind) + classesnearbyx.pop(minind) + classesnearbypccl.pop(minind) + # for ind, clnrby in enumerate(reversed(classesnearby)): + # classesnearbyx + # del lastofclass[classesnearby[ind]] + # # del lastofclassx[classesnearby[minind]] + # classesnearby.pop(minind) + # classesnearbyx.pop(minind) + try: + ind=classesnearby.index(cl) + classesnearbyx[ind] = positions[peaknum] + # #print(ind ,' --------------------------------------here -----------------------------') + except ValueError: + classesnearby.append(cl) + classesnearbyx.append(positions[peaknum]) + classesnearbypccl.append(pcclasses[peaknum]) + else: + if cl != 0: + classes[peaknum] = cl + else: + cl = classamount+1 + #print('existingclasses: ', classamount) + classamount = cl + #print('newclass: ', cl) + lastofclass[cl] = deque() + lastofclassx[cl] = deque() + lastofclass[cl].append(heights[peaknum]) + lastofclassx[cl].append(positions[peaknum]) + classes[peaknum] = cl + classesnearby.append(cl) + classesnearbyx.append(positions[peaknum]) + classesnearbypccl.append(pcclasses[peaknum]) + if len(classesnearby) >= 12: #kacke implementiert? + minind = classesnearbyx.index(min(classesnearbyx)) + del lastofclass[classesnearby[minind]] + del lastofclassx[classesnearby[minind]] + #print(classesnearby[minind], 'del') + classesnearby.pop(minind) + classesnearbyx.pop(minind) + classesnearbypccl.pop(minind) + # for ind, clnrby in enumerate(reversed(classesnearby)): + # classesnearbyx + # del lastofclass[classesnearby[ind]] + # # del lastofclassx[classesnearby[minind]] + # classesnearby.pop(minind) + # classesnearbyx.pop(minind) + try: + ind=classesnearby.index(cl) + classesnearbyx[ind] = positions[peaknum] + # #print(ind ,' --------------------------------------here -----------------------------') + except ValueError: + classesnearby.append(cl) + classesnearbyx.append(positions[peaknum]) + classesnearbypccl.append(pcclasses[peaknum]) + # #print('classesnearby after a peak', classesnearby) + # for clnum, cls in enumerate(classesnearby): ## deleting almost identical classes (< % difference in amplitude) + # if cls == False: + # continue + # if True: + # continue + # compare = np.mean(lastofclass[cls]) + # for i in classesnearby[clnum:-1]: + # if i== False: + # continue + # if i != cls and abs(compare - np.mean(lastofclass[i])) < compare*0.01: ## + # # #print(compare) + # # #print( np.mean(np.vectorize(lambda peak: peak.height)(lastofclass[i]))) + # clindex = classesnearby.index(cls) + # classesnearby[clindex] = False + # classesnearbyx[clindex] = False + # del lastofclass[cls] + # del lastofclassx[cls] + # # cl = holdlastcl + # # if cl == cls: + # + # + # #print('combinedsomeclasses that were similar', cl, cls) + time2 = time.time() + # awc_btime.append(time2-time1) #2 + # classesnearby = [cls for cls in classesnearby if cls != False] + # classesnearbyx = [clx for clx in classesnearbyx if clx != False] + # + # + #print('awc_btime ', awc_btime , ' newpeak-------------------------------------------------------- :') + peaklist.lastofclass = lastofclass + peaklist.lastofclassx = lastofclassx + peaklist.classesnearby = classesnearby + peaklist.classesnearbyx = classesnearbyx + peaklist.classlist = classes # np.vectorize(lambda peak: peak.cl, otypes=[object])(peaklist.list) + peaklist.classamount = classamount + peaks = np.append(peaks,classes[None,:], axis = 0) + return peaks, peaklist + +def joincc(peaklist,peaks): + # peaklist = peaks.list + joinedsome = False + classlist = peaks[4] + peaksofclass = {} + last = [] + connect = {} #connect classes in connect+ + classcount = dict.fromkeys(classlist, 0) + ##print(classcount) + #classcount = [0]*len(np.unique(classlist)) + # #print(np.unique(classlist)) + for cl in np.unique(classlist): + peaksofclass[cl]= peaks[:,classlist == cl] + for i in range(len(peaks[0])): # i is the increasing index of the peaks + p = peaks[:,i] + poc = peaksofclass[p[4]] + classcount[p[4]]+=1 + countclass = p[4] #the current class before it might be changed to the connected class + if p[4] in connect: + p[4] = connect[p[4]] #peakclass is changed to connected class + # #print('changed ', countclass, 'to', p.cl) + joinedsome = True + + if len(poc) == classcount[countclass]: #the current peak is last peak of its class + last = poc[-len(poc) if len(poc) <= 5 else 5:] #the last peaks of the class + # #print('last: ', last) + #mean_last = np.mean(np.vectorize(lambda peak: peak[2])(last)) + mean_last = np.mean(last[2,:]) + nextfirst = {} # the first peaks of the next coming class(es) + # #print('class: ', countclass, 'at x = ', p.x, 'mean_last: ', mean_last) + for nexti in range(20): # the next 10 peaks are considered if they belong to the same classe + if i + nexti >= len(peaks[0]): break + inextp = peaks[:,i+nexti] + if classcount[inextp[4]] == 0: #current peak is first peak of its class + # #print('found a new begin! its class:' , inextp.cl) + ponc = peaksofclass[inextp[4]] # + nextfirst[inextp[4]] = ponc[0:len(ponc) if len(ponc) <= 5 else 5] + # #print(np.mean(np.vectorize(lambda peak: peak.height)(nextfirst[inextp.cl]))) + # #print(nextfirst) + compare = 1 + c = 0 + nextclass = -1 + for nextcl, first in nextfirst.items(): + mean_nextfirst = np.mean(first[2,:])#np.mean(np.vectorize(lambda peak: peak.height)(first)) + # #print(mean_nextfirst) + error = abs(mean_nextfirst - mean_last)/(mean_nextfirst) + if error < 1: + if compare < error: + continue + compare = error + if nextcl in connect: #if the peak that ist considered belongs to a class, that is already supposed to be connected to the current class + pocc = peaksofclass[connect[nextcl]] #peaks of the currently supposed connected class + if ( abs(mean_nextfirst - np.mean(pocc[-len(pocc) if -len(pocc) <= 5 else 5:][2])) + < abs(mean_nextfirst - mean_last) ): + continue + nextclass = nextcl + if nextclass != -1: + connect[nextclass] = p[4] + # #print('connect ', p.cl , ' and ', nextcl) + for cl in peaklist.classesnearby: + if cl in connect: + # #print('cl, connect', cl, connect[cl]) + peaklist.classesnearby[peaklist.classesnearby.index(cl)] = connect[cl] + peaklist.lastofclass[connect[cl]]=peaklist.lastofclass[cl] + peaklist.lastofclassx[connect[cl]]= peaklist.lastofclassx[cl] + peaklist.classlist = peaks[4] + return joinedsome + # for poc in peaksofclass: + # if len(poc) >= 3: + # newlast = poc[-3:] + # first = poc[:3] + # else: + # newlast = poc[-len(poc):] + # first = poc[:len(poc)] + # if last != []: + # if abs(np.mean(first) - np.mean(last)) < 0: + # #print('oh') + +def discardwaves_refactor(peaks, data): + + deleteclasses = [] + for cl in np.unique(peaks[3]): + peaksofclass = peaks[:,peaks[3] == cl] + isi = np.diff(peaksofclass[0]) + isi_mean = np.mean(isi) + # #print('isismean',isi_mean) + widepeaks = 0 + # #print('width',peaksofclass[2].width) + isi_tenth_area = lambda x, isi:np.arange(np.floor(x-0.1*isi),np.ceil(x+0.1*isi),1, dtype = np.int) + for p in peaksofclass.T: + data = np.array(data) + try: + for dp_around in data[isi_tenth_area(p[0],isi_mean)]:#np.floor(p[0]-0.1*isi_mean), np.ceil(p[0]+0.1*isi_mean),1)]:# + if dp_around <= p[1]-p[2]: + break + except IndexError: + pass + else: + widepeaks+=1 + ## p.isreal_pleateaupeaks() + if widepeaks > len(peaksofclass)*0.5: + deleteclasses.append(cl) + for cl in deleteclasses: + peaks = peaks[:,peaks[3]!=cl] + return peaks + +def smallclassdiscard(peaks, mincl): + classlist = peaks[3] + smallclasses = [cl for cl in np.unique(classlist) if len(classlist[classlist + == cl]) < + mincl] + delete = np.zeros(len(classlist)) + for cl in smallclasses: + delete[classlist == cl] == 1 + peaks = peaks[:,delete != 1] + return peaks + +def makepeak(data_x,cutsize, maxwidth, peakx, ltr, data_ltr, rtr, data_rtr, num, minhlr): + #if len(data) > peakx + cutsize/2: + return Peak(peakx, data_x, maketr(data_ltr, ltr), maketr(data_rtr, rtr), maxwidth, num, minhlr)#data[peakx-cutsize/2:peakx+cutsize/2], num) + #else: + # return Peak(peakx, data[peakx], + # maketr(data, ltr), + # maketr(data, rtr), + # maxwidth, + # #data[peakx-cutsize/2:-1], + # num) + +def maketr(data_x, x): + if x is not None: + return Tr(x,data_x) + else: + return None + +def makepeaklist(pkfirst, data, pk, tr, cutsize, maxwidth): + peaklist = np.empty([len(pk)], dtype = Peak) + trtopk = pkfirst + pktotr = 1-pkfirst + trlen = len(tr) + pklen = len(pk) + minhlr = lambda i, mwl, mwr : min( + abs( data[pk[i]] - min( data[pk[i]-mwl:pk[i]] ) if len(data[pk[i]-mwl:pk[i]]) > 0 else 0 ) + , + abs( data[pk[i]]- min( + data[pk[i]:pk[i]+mwr] ) if len(data[pk[i]:pk[i]+mwr]) > 0 else 0 ) + ) + #print(min( data[pk[0]-0:pk[2]]) ) + + if pktotr == 0: + peaklist[0] = makepeak(data[0], cutsize, maxwidth, pk[0], None, None, tr[pktotr], data[pktotr], 0, minhlr(0, 0, maxwidth)) + else: + peaklist[0] = makepeak(data[0], cutsize, maxwidth, pk[0], + tr[-trtopk], + data[-trtopk], tr[pktotr], data[pktotr], + 0, minhlr(0, min(maxwidth, + pk[0]-tr[-trtopk]) , maxwidth)) + for i in range(1,pklen-1): + peaklist[i] = makepeak(data[pk[i]], cutsize, maxwidth, pk[i], tr[i-trtopk], data[tr[i-trtopk]], tr[i+pktotr],data[tr[i+pktotr]], i, minhlr(i, maxwidth, maxwidth)) + if pktotr == 0 and pklen <= trlen: + peaklist[pklen-1] = makepeak(data[pk[pklen-1]],cutsize, maxwidth, pk[pklen-1], tr[pklen-trtopk-1], data[pklen-trtopk-1], tr[pklen+pktotr-1], data[pklen+pktotr-1], i, minhlr(pklen-1, maxwidth, min(maxwidth, tr[pklen+pktotr-1]-pk[pklen-1]))) + else: + peaklist[pklen-1] = makepeak(data[pk[pklen-1]],cutsize, maxwidth, pk[pklen-1], tr[pklen-trtopk-1],data[pklen-trtopk-1], None, None, pklen-1, minhlr(pklen-1, maxwidth, 0)) + return peaklist + +#def doublepeaks(peaks, peakwidth): +# dif2 = peaks[1].x-peaks[0].x +# if dif2 > 5* peakwidth: +# peaks[0].real = False +# for i in range(1,len(peaks)-1): +# dif1 = dif2 +# dif2 = peaks[i+1].x-peaks[i].x +# if dif1 > 5* peakwidth and dif2 > 5* peakwidth: +# peaks[i].real = False +# if dif2 > 5* peakwidth: +# peaks[len(peaks)-1] = False +# return peaks + +def discardunrealpeaks(peaklist): + peaks = peaklist[:][np.vectorize(lambda peak: peak.real, otypes=[object])(peaklist) == True] + for i, p in enumerate(peaks): + pass + # p.num = i + return peaks + +def discardnearbypeaks(peaks, peakwidth): + peaksx = xarray(peaks) + pkdiff = np.diff(peaksx) + # peakwidth = avg_peakwidth(pknum,tr) + pknumdel= np.empty(len(peaksx)) + pknumdel.fill(False) +# peaksy = yarray(peaks) + peaksh = heightarray(peaks) + for i,diff in enumerate(pkdiff): + # #print(peaks[i].height) + if diff < peakwidth: #* peaks[i].height: ### Trial Error + if peaksh[i+1] > 1.01 *peaksh[i] : + pknumdel[i] = True + else: + # print(peaksh[i],peaksh[i+1]) + pknumdel[i+1] = True + peaks = peaks[pknumdel!=True] + for i, p in enumerate(peaks): + p.num = i + return peaks + +def interpol(data, kind): + #kind = 'linear' , 'cubic' + width = len(data) + x = np.linspace(0, width-1, num = width, endpoint = True) + return interp1d(x, data[0:width], kind , assume_sorted=True) + +def cutcenter(peak): + p = peak + cut = p.cut + pl=p.distancetoltr + pr=p.distancetortr + if pl is None: + pl = 10 + tx = p.x-10 + else: tx = p.ltr.x + if pr is None: + pr = 10 + if pl < p.maxwidth and pr > 1: + + width=len(cut) + # #print('distancetoltr',pl) + peakshape = cut + interpolfreq = 1 + xnew = np.linspace(0,len(peakshape)-1, len(peakshape)*interpolfreq, endpoint= True) + curvyf = interpol(peakshape) + curvy= curvyf(xnew) + #px = p.cutsize/2 * 4 + #left = px - (5*4) + #plt.plot(xnew, curvy) + #x_0 = optimize.fsolve(curvyf, 1.0) + # f = interp1d(x, y) + # f2 = interp1d(range(width), data[x:x+width], kind='cubic') + ##xnew = np.linspace(0, width-1, num = width*4, endpoint = True) + ##print(xnew) + # plt.plot(xnew,f2(xnew)) + ##print("show") + #plt.show + trx = (p.cutsize/2 - (p.x - tx) ) + if trx >0 : + xstart = trx + else: + xstart = 0 + # #print('pkx: ', p.x, 'ltrx: ', p.ltr.x) + # #print('trx in intpol', x) + x = xstart + if curvyf(x) < 0: + left = 0 + right= 0 + while(x < width-1 and curvyf(x) < 0) : + left = x + # #print(curvyf(x)) + x+=0.25 + right = x + # #print('x: ', x , 'left, right: ', curvyf(left), curvyf(right)) + x = left+(1-curvyf(right)/(curvyf(right)-curvyf(left)))*1/interpolfreq + # #print(x) + else: + x = 0 + # #print(x_int) + # plt.scatter(xstart, curvyf(xstart), marker = 'x', s=150, zorder=2, linewidth=2, color='red') + # plt.scatter(x, curvyf(x), marker='x', s=150, zorder=2, linewidth=2, color='black') + # plt.show + # #print(x_int) + #p.relcutcenter = (p.ltr.x + x_int)-p.x + ##print('cent',p.relcutcenter) + #return (p.ltr.x + x_int)-p.x + + # while(data[x]>0) + else: + x= 0 + + return x + +def relcutarray(peaks): + return np.vectorize(lambda peak: peak.relcutcenter)(peaks) + +def xarray(peaks): + if len(peaks)>0: + peakx = np.vectorize(lambda peak: peak.x)(peaks) + return peakx + else: return [] + +def yarray(peaks): + if len(peaks)>0: + return np.vectorize(lambda peak: peak.y)(peaks) + else: return [] + +def heightarray(peaks): + if len(peaks)>0: + return np.vectorize(lambda peak: peak.height)(peaks) + else: return [] + +def clarray(peaks): + if len(peaks)>0: + return np.vectorize(lambda peak: peak.cl)(peaks) + else: return [] +def pcclarray(peaks): + if len(peaks)>0: + return np.vectorize(lambda peak: peak.pccl)(peaks) + else: return [] + +def peakxarray( ): + peakx = np.empty([len]) + peakx = np.vectorize(lambda peak: peak.x)(peaks) + return peakx + +def peakyarray( ): + peaky= np.empty([len]) + return np.vectorize(lambda peak: peak.y)(peaks) + + +def classify( ): + #template = peaks[0] + meanfit = np.mean(np.vectorize(fit, otypes=[object])(template,peaks)) + for p in peaks: + if fit(template,p) < meanfit: + # #print('classified ', fit(template,p) , ' meanfit: ' , meanfit) + p.currentclass = 1 + +def classifyhiker(template, peaks): + meanfit = np.mean(np.vectorize(fitinterpol2, otypes=[object])(template,peaks)) + #toclassify = peaks.tolist() + firstnot = 0 + for c in range(1,5): + first = True + template = peaks[firstnot] + for i, p in enumerate(peaks[firstnot:]): + if p.currentclass == 0: + if fitinterpol2(template,p) < meanfit: + # #print('peak number ' , i, 'classified as ', c, fit(template,p) , ' meanfit: ' , meanfit) + p.currentclass = c + template = p + elif first == True: + # #print('peak number ' , i, 'classified as First! ', c, fit(template,p) , ' meanfit: ' , meanfit) + firstnot = i + first = False + else: + None + ##print('peak number ' , i, 'classified as not classified!', fit(template,p) , ' meanfit: ' , meanfit) + return peaks + + + # def Templatefitnext( , number, templnum): + # for p in peaks: + # if fit(peaks[templnum], p) < fitparameter: + +def cut_snippets(data, peaklist, rnge): + snippets = [] + positions = xarray(peaklist) + heights = heightarray(peaklist) + for pos in positions: + snippets.append(data[(pos+rnge[0]):(pos+rnge[1])]) + scaledsnips = np.empty_like(snippets) + for i, snip in enumerate(snippets): + top = -rnge[0] + # plt.plot(snip) + scaledsnips[i] = snip * 1/heights[i] + #plt.plot(scaledsnips[i]) + # print('plted') +# plt.show() + #print('1') + alignedsnips = np.empty((len(snippets), (rnge[1]-rnge[0])*10-30-10)) + standardized = np.empty((len(snippets), (rnge[1]-rnge[0])*10-10)) + intfact = 10 + for i, snip in enumerate(scaledsnips): + if len(snip) < ((rnge[1]-rnge[0])): + if i == 0: + snip =np.concatenate([np.zeros([((rnge[1]-rnge[0]) - len(snip))]),np.array(snip)]) + if i == len(scaledsnips): + snip = np.concatenate([snip, np.zeros([((rnge[1]-rnge[0])-len(snip))])]) + else: + # print('this') + snip = np.zeros([(rnge[1]-rnge[0])]) + interpoled_snip = interpol(snip)(np.arange(0, len(snip)-1, 1/intfact)) if len(snip) > 0 else np.zeros([(rnge[1]-rnge[0]-1)*intfact ]) #interpolfactor 10 + + intsnipheight = np.max(interpoled_snip) - np.min(interpoled_snip) + if intsnipheight == 0: + intsnipheight = 1 + interpoled_snip = (interpoled_snip - max(interpoled_snip))* 1/intsnipheight + standardized[i] = interpoled_snip + #print('2') + mean = np.mean(standardized, axis = 0) + #plt.plot(mean) +# plt.show() + #plt.plot(mean[10*-rnge[0]-10*5:-10*rnge[1]+21]) +# plt.show() + meantop = np.argmax(mean) + for i, snip in enumerate(standardized): + #plt.show() + interpoled_snip = snip #standardized[i] + cc = crosscorrelation(interpoled_snip[15:-15], mean) + #cc = crosscorrelation(interpoled_snip[15 + 10*-rnge[0]-10*7:-15+ -10*rnge[1]+ 31], mean[10*-rnge[0]-10*7:-10*rnge[1]+31]) + #plt.plot(interpoled_snip[15 + 10*-rnge[0]-10*7:-15+ -10*rnge[1]+ 31]) + #top = np.argmax(interpoled_snip) + #offset = meantop - top + #if not(-15 <= offset <= 15): offset = 0 + offset = -15 + np.argmax(cc) + interpoled_snip = interpoled_snip[15-offset:-15-offset] if offset != -15 else interpoled_snip[30:] + #print(offset) + #plt.plot(interpoled_snip) + if len(interpoled_snip[~np.isnan(interpoled_snip)])>0: + alignedsnips[i] = interpoled_snip + #plt.show() + # print('3') + return snippets, alignedsnips + + + +def fit(templ, peak): + fit = np.sum(np.square(templ.cut - peak.cut)) + return fit + +def fitinterpol2(templ,peak): + t = templ + p = peak + if p.real and t.real: + fit = np.sum(np.square(t.cutaligned-p.cutaligned)) + else: + fit = 0 + return fit + + + +def fitinterpol( templ, peak): + t = templ + p = peak + if p.real: + centerp = cutcenter(p) + centert = cutcenter(t) + shiftp = centerp-p.cutsize/2 + shiftt = centert-t.cutsize/2 + + if shiftp > -5: + shiftp = min(5, 5+centerp-p.cutsize/2) + else: shiftp = 0 + + if shiftt > -5: + shiftt = min(5, 5+centert-t.cutsize/2) + else: shiftt = 0 + + xnew = np.linspace(0,p.cutsize-11, (p.cutsize-1) * 4,endpoint = True) + #peak_interpoled = interpol(p.cut)(xnew) + #plt.plot(xnew, interpol(p.cut)(xnew+shift)) + # #print(interpol(templ.cut)(xnew+shiftt)-interpol(p.cut)(xnew+shiftp)) + fit = np.sum(np.square(interpol(templ.cut)(xnew+shiftt)-interpol(p.cut)(xnew+shiftp))) + else: + fit = 0 + return fit + + +def plotdata(peaks, data): + x = xarray(peaks) + y = yarray(peaks) + plt.plot(range(len(data)),data) + plt.plot(x, y, '.r', ms=20) + #for p in peaks: + # #print(p.height, p.x, p.y, p.distancetoltr, p.distancetortr, p.nexttrdistance) + # plt.plot(tr, data[tr], '.g', ms=20) + plt.show() + + +def plotdatabyx(peaksx, data): + x = peaksx + y = data[peaksx] + plt.plot(range(len(data)),data) + plt.plot(x, y, '.r', ms=20) + plt.show() + #for p in peaks: + # #print(p.height, p.x, p.y, p.distancetoltr, p.distancetortr, p.nexttrdistance) + # plt.plot(tr, data[tr], '.g', ms=20) + +def plotpeak(peaks): + #plt.plot(peaks), cutpeaks) #bei betrachtung aller blocks zu groß! + for p in peaks: + plt.plot(range(p.cutsize),p.cut) + #plt.plot(pk, x[pk] , '.r', ms=20) + plt.show() + + +def periodicinclass(peaks, cl): + noiselist = [] + classlist = np.vectorize(lambda peak: peak.cl, otypes=[object])(peaks) + peaks = xarray(peaks) + peaks = peaks[:][classlist == cl] + periodic = [] + periodiccollector = [] + error2 = [] + isperiodic = True + b=1 + c=2 + ctofar = False + compdif = 0 + dif = 0 + count = 1 + foundtriple = False + next = 0 + for i in range(len(peaks)-1): + if i != next: continue + # #print(i, 'foundtriple', foundtriple) + error2 = [] + b=1 + c=0 + A = peaks[i] + B = peaks[i+b] + compdif = dif + while foundtriple == True and count <= 3 and i+1 < len(peaks)-1: + while B-A < compdif*1.5 and i+b+1 < len(peaks)-1: + # #print('newdif: ', B-A, 'olddif:' , dif) + if abs((B-A) - compdif) < compdif*0.4: + error2.append(abs((B-A) - dif)) + b+=1 + B = peaks[i+b] + if len(error2) > 0: + bestB = error2.index(min(error2)) + B = peaks[i+1 + bestB] + periodic.append(B) + dif = 0.5*(dif + (B-A)) + # #print('match found') + b = 1+bestB + break + else: + count+=1 + compdif = dif*count + else: + if foundtriple == True: + # #print('no further match found, ') + isperiodic = False + + + + + while foundtriple == False and i+c< len(peaks)-1: + while i+c < len(peaks)-1: + A = peaks[i] + B = peaks[i+b] + C = peaks[i+c] + dif1 = B - A + dif2 = C - B + if (C-B > (B-A)*1.5): + break + if abs(dif1 - dif2) < dif1*0.4: + error2.append(abs(dif1-dif2)) + c +=1 + #C = peaks[i+c] # C weiterlaufenlassen, bis zu weit + else: + if len(error2) == 0: + # #print('no triple found') + isperiodic = False + if len(error2) > 0: + bestC = error2.index(min(error2)) + C = peaks[i+2 + bestC] + c = 2+ bestC + periodic.extend((A,B,C)) + dif1 = B - A + dif2 = C - B + # #print('dif1: ', dif1, 'dif2: ', dif2) + dif = 0.5*(dif2+dif1) + foundtriple = True + # #print('triple found', i+c, 'dif : ', dif) + else: + error2 = [] # B weiterlaufen lassen, C reset auf B+1 + b +=1 + c = b+1 + + if isperiodic == False: + if len(periodic) > 3: + periodiccollector.append(periodic) + isperiodic = True + periodic = [] + if c!=0: + next = i+c + else: + next = i+b + if len(periodiccollector) > 0: + # for i in range(len(periodiccollector)): + # #print('collector ', i, periodiccollector[i]) + return periodiccollector + else: + #print('no periodicity found') + return [] + + + +def noisediscard(peaklist, tsh_n, ultimate_threshold): + detected_noise = False + ##print('noisetsh: ', tsh_n) + for p in peaklist.list: + + if p.height < tsh_n or p.height < ultimate_threshold: + p.noise = True + detected_noise = True + peaklist.list = peaklist.list[:][np.vectorize(lambda peak: peak.noise, otypes=[object])(peaklist.list) == False] + # #print(peaks) + # for cl in classlist: + # diff = np.vectorize(lambda peak: peak.x, otypes=[object])(peaks[:][classlist == cl]) + # meandiff = np.mean(diff) + # msecompare = np.mean(np.square(diff-(diff*0.8))) + # mse = np.mean(np.square(diff-meandiff)) + # if mse > msecompare: + # noiselist.append(cl) + # for p in peaks: + #if p.cl in noiselist: + # if p.height < 0.1: + # p.noise = True + # peaks = peaks[:][np.vectorize(lambda peak: peak.noise, otypes=[object])(peaks) == False] + # return peaks + return detected_noise + + +def plotPCclasses(peaks, data): + plt.plot(range(len(data)),data, color = 'black') + classlist = np.vectorize(lambda peak: peak.pccl, otypes=[object])(peaks) + cmap = plt.get_cmap('jet') + colors =cmap(np.linspace(0, 1.0, 3000)) #len(np.unique(classlist)))) + np.random.seed(22) + np.random.shuffle(colors) + colors = [colors[cl] for cl in np.unique(classlist)] + print('classlist', np.unique(classlist)) + # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.c', ms=20) + # x=0 +# if len(classlist)>0: + # #print(classlist) + # #print('classes: ' , np.unique(classlist)) + #from collections import Counter + #count = Counter(classlist) + # #print('longest class: ', count.most_common()[0]) + for num, color in zip(np.unique(classlist), colors): + if num == -1 : + color = 'black' + peaksofclass = peaks[:][classlist == num] + #xpred = linreg_pattern(peaksofclass[0:3]) + #for p in peaksofclass[0:3]: + # #print(p.x) + ##print(xpred, peaksofclass[3].x) + + #if len(peaksofclass) > 1000: + # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.', color = 'red', ms =20) + #else: + print(num) + plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.', color = color, ms =20) + plt.scatter(xarray(peaksofclass), heightarray(peaksofclass)) + # for p in peaks: + # plt.text(p.x, p.y, p.num) + #plt.show() + + # plt.show() + plt.close() + +def plotampwalkclasses_refactored(peaks, data): + plt.plot(range(len(data)),data, color = 'black') + classlist = np.array(peaks[3],dtype=np.int) + cmap = plt.get_cmap('jet') + colors =cmap(np.linspace(0, 1.0, 3000)) #len(np.unique(classlist)))) + np.random.seed(22) + np.random.shuffle(colors) + colors = [colors[cl] for cl in np.unique(classlist)] + # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.c', ms=20) + # x=0 +# if len(classlist)>0: + # #print(classlist) + # #print('classes: ' , np.unique(classlist)) + #from collections import Counter + #count = Counter(classlist) + # #print('longest class: ', count.most_common()[0]) + for cl, color in zip(np.unique(classlist), colors): + peaksofclass = peaks[:,classlist == cl] + #xpred = linreg_pattern(peaksofclass[0:3]) + #for p in peaksofclass[0:3]: + # #print(p.x) + ##print(xpred, peaksofclass[3].x) + + #if len(peaksofclass) > 1000: + # plt.plot(xarray(peaksofclass), yarray(peaksofclass), '.', color = 'red', ms =20) + #else: + + plt.plot(peaksofclass[0],peaksofclass[1], '.', color = color, ms =20) + plt.scatter(peaksofclass[0], peaksofclass[2]) + # for p in peaks: + # plt.text(p.x, p.y, p.num) + plt.show() + + # plt.show() + plt.close() + + +def crosscorrelation(sig, data): + autocorr = signal.fftconvolve(data, sig[::-1], mode='valid') + return autocorr + +def plottemplatefits(data, peaks, tr, templnum): + # + plotdata(peaks, data, tr) + plt.plot(range(len(data)),data) + classes = np.vectorize(lambda peak: peak.currentclass, otypes=[object])(peaks) + class1 = peaks[:][classes == 1 ] + if len(class1) > 0: + plt.plot(xarray(class1), yarray(class1), '.r', ms=20) + class2 = peaks[:][classes == 2 ] + if len(class2) > 0: + plt.plot(xarray(class2), yarray(class2), '.g', ms=20) + class3 = peaks[:][classes == 3 ] + if len(class3) > 0: + plt.plot(xarray(class3), yarray(class3), '.c', ms=20) + class4 = peaks[:][classes == 4 ] + if len(class4) > 0: + plt.plot(xarray(class4), yarray(class4), '.y', ms=20) + + # for p in peaks: # <-- + # plt.text(p.x , p.y, p.num) + + # plt.plot(tr, data[tr], '.g', ms=20) + plt.show() + +def linreg_pattern(peaks): + from sklearn import datasets, linear_model + from sklearn.metrics import mean_squared_error, r2_score + + peaksx = xarray(peaks) + peaksx = peaksx.reshape(-1,1) + #peaksh = heightarray(peaks) + #peakx = peak.x + # Create linear regression object + regr = linear_model.LinearRegression() + numbers = np.arange(len(peaks)).reshape(-1,1) + # Train the model using the training sets + regr.fit(numbers, peaksx) + + # Make predictions using the testing set + peakx_pred = regr.predict(len(peaks)) + # # The coefficients + # #print('Coefficients: \n', regr.coef_) + # # The mean squared error + # #print("Mean squared error: %.2f" + # % mean_squared_error(diabetes_y_test, diabetes_y_pred)) + # # Explained variance score: 1 is perfect prediction + # #print('Variance score: %.2f' % r2_score(diabetes_y_test, diabetes_y_pred) + + + # Plot outputs + #plt.scatter(peaksx, peaksh, color='black') + #plt.scatter(peakx, peakh_pred, color='blue') + + #plt.xticks(()) + #plt.yticks(()) + + # plt.show() + + return peakx_pred + +def linreg(peaks, peak): + from sklearn import datasets, linear_model + from sklearn.metrics import mean_squared_error, r2_score + + peaksx = xarray(peaks) + peaksx = peaksx.reshape(-1,1) + peaksh = heightarray(peaks) + peakx = peak.x + # Create linear regression object + regr = linear_model.LinearRegression() + + # Train the model using the training sets + regr.fit(peaksx, peaksh) + + # Make predictions using the testing set + peakh_pred = regr.predict(peakx) + + # # The coefficients + # #print('Coefficients: \n', regr.coef_) + # # The mean squared error + # #print("Mean squared error: %.2f" + # % mean_squared_error(diabetes_y_test, diabetes_y_pred)) + # # Explained variance score: 1 is perfect prediction + # #print('Variance score: %.2f' % r2_score(diabetes_y_test, diabetes_y_pred) + + + # Plot outputs + #plt.scatter(peaksx, peaksh, color='black') + #plt.scatter(peakx, peakh_pred, color='blue') + + #plt.xticks(()) + #plt.yticks(()) + + # plt.show() + + + + return peakh_pred + +def wp_transform(x): + import pywt + wp = pywt.WaveletPacket(data=x, wavelet='haar', mode='symmetric') + print('maxlevel: ', wp[''].maxlevel) + return (np.array([node.data for node in wp.get_level(wp[''].maxlevel, 'freq')])).flatten() + +def wpfeats(snips): + size = len(wp_transform(snips[0])) + wp = np.empty([len(snips), size]) + for i, snip in enumerate(snips): + print(wp_transform(snip)) + wp[i] = (wp_transform(snip)) + #wp = wp.T + print(wp[0]) + wpcoef = wp.T + print(wp[0]) + from sklearn.preprocessing import StandardScaler + wpcoef = StandardScaler().fit_transform(wpcoef) + coeffvalues = [] + for coeff in wpcoef: + stat, crit, sig = stats.anderson(coeff, dist = 'norm') + # coeffvalues.append(stat) + coeffvalues.append(np.sum(np.abs(coeff))) + coeffvalues = np.array(coeffvalues) + coeffs = np.argsort(coeffvalues)[::-1][:10] + print(coeffvalues[coeffs]) + return wp.T[coeffs] + + + + +def pc(cutsnippets, peaklist): + # (observations, features) matrix + M = np.empty([len(cutsnippets), len(cutsnippets[0])]) + for i, snip in enumerate(cutsnippets): + M[i] = snip[:] + from sklearn.preprocessing import StandardScaler + StandardScaler().fit_transform(M) + # #print(M.shape, ' Mshape') + # singular value decomposition factorises your data matrix such that: + # + # M = U*S*V.T (where '*' is matrix multiplication) + # + # * U and V are the singular matrices, containing orthogonal vectors of + # unit length in their rows and columns respectively. + # + # * S is a diagonal matrix containing the singular values of M - these + # values squared divided by the number of observations will give the + # variance explained by each PC. + # + # * if M is considered to be an (observations, features) matrix, the PCs + # themselves would correspond to the rows of S^(1/2)*V.T. if M is + # (features, observations) then the PCs would be the columns of + # U*S^(1/2). + # + # * since U and V both contain orthonormal vectors, U*V.T is equivalent + # to a whitened version of M. + + U, s, Vt = np.linalg.svd(M, full_matrices=False) + V = Vt.T + + # PCs are already sorted by descending order + # of the singular values (i.e. by the + # proportion of total variance they explain) + S = np.diag(s) + # PC = (s*V) + # PCs: + #print(U.shape) + #print(S.shape) + #print(V.shape) + #print(s[0], U[0,:]) + + #PC1 = (s[0] * U[:,0]) + #PC2 = (s[1] * U[:,1]) + #for i, p in enumerate(peaklist): + # p.pc1 = PC1[i] + # p.pc2 = PC2[i] + + #mu = peaks.mean(axis=0) + #fig, ax = plt.subplots() + #ax.scatter(xData, yData) + #for axis in U: + # start, end = mu, mu + sigma * axis + # ax.annotate( + # '', xy=end, xycoords='data', + # xytext=start, textcoords='data', + # arrowprops=dict(facecolor='red', width=2.0)) + #ax.set_aspect('equal') + #plt.show() + + + # if plot_steps: + # plt.scatter(PC1, PC2) + # plt.show() + + # PCData1 = (U[:,0]*M) + # PCData2 = (U[:,1]*M) + # plt.scatter(PCData1, PCData2) + # plt.show() + + #plt.scatter(U[:,0],U[:,1]) + #plt.show() + #print('done') + #return PC + + # if we use all of the PCs we can reconstruct the noisy signal perfectly + #Mhat = np.dot(U, np.dot(S, V.T)) + #print('Using all PCs, MSE = %.6G' %(np.mean((M - Mhat)**2))) + + #plt.show() + return S@U.T + +def gettime(x, samplerate, starttime): + startm = int(starttime[-2:]) + starth = int(starttime[:-2]) + seconds = x/samplerate + m, s = divmod(seconds, 60) + m = m + startm + h, m = divmod(m, 60) + h = h+starth + return "%d:%02d:%02d" % (h, m, s) + +def connect_blocks(oldblock): + newblock = Peaklist([]) + newblock.lastofclass = oldblock.lastofclass + newblock.lastofclassx = oldblock.lastofclassx + newblock.classesnearby = oldblock.classesnearby + newblock.classesnearbypccl = oldblock.classesnearbypccl + newblock.classesnearbyx = [clnearbyx - oldblock.len for clnearbyx in oldblock.classesnearbyx] + newblock.classamount = oldblock.classamount + return newblock + ##print('classesnearbyx! old, new ' , oldblock_len,oldblock.classesnearbyx , newblock.classesnearbyx) + +if __name__ == '__main__': + main() + + + +# deleted Code, but unsure if really want to delete: + + #nix #print( b.data_arrays) + + # for cl in np.unique(cllist): + + # currentfish_x = x[:][cllist == cl] + # currentfish_y = y[:][cllist == cl] + # currentfish_h = x[:][cllist == cl] + + + #nix try: + #nix xpositions[cl] = b.create_data_array("f%d_eods" %cl, "spiketimes", data = currentfish_x) + #nix xpositions[cl].append_set_dimension() + #nix # thisfish_eods = b.create_multi_tag("f%d_eods_x"%cl, "eods.position", xpositions[cl]) + #nix # thisfish_eods.references.append(nixdata) + #nix except nix.pycore.exceptions.exceptions.DuplicateName: + #nix + #nix xpositions[cl].append(currentfish_x) + + + #thisfish_eods.create_feature(y, nix.LinkType.Indexed) + #b.create_multi_tag("f%d_eods_y"%cl, "eods.y", positions = y) + #b.create_multi_tag("f%d_eods_h"%cl, "eods.amplitude", positions = h) + #thisfish_eods.create_feature + + + + +# in analyseEods +# in analyseEods classlist = eods[3] #np.vectorize(lambda peak: peak.cl, otypes=[object])(worldpeaks.list) +# in analyseEods fishclass = {} +# in analyseEods #print('classlist: ', classlist) +# in analyseEods # #print('Classes at end: ', np.unique(classlist)) +# in analyseEods +# in analyseEods +# in analyseEods fishes = {} +# in analyseEods for num in np.unique(classlist): +# in analyseEods fishes[num] = eods[:,:][: , classlist == num] +# in analyseEods +# in analyseEods +# in analyseEods +# in analyseEods +# in analyseEods fishes = fill_hidden_3(fishes) # cl-dict : x y z -dict +# in analyseEods #maxlencl = max(fishes, key=lambda k: fishes[k]['x'][-1]-fishes[k]['x'][0]) +# in analyseEods +# in analyseEods fishes, weirdparts = fill_holes(fishes) +# in analyseEods fishes, weirdparts = fill_holes(fishes) +# in analyseEods +# in analyseEods for cl in np.unique(classlist): +# in analyseEods isi = [isi for isi in np.diff(fishes[cl]['x'])] +# in analyseEods fishes[cl][3]= isi +# in analyseEods + + +#npFish +#npFish npFishes = {} +#npFish fishfeaturecount = len(fishes[cl]) +#npFish for cl in np.unique(classlist): +#npFish npFishes[cl]= np.zeros([fishfeaturecount, len(fishes[cl]['x'])]) +#npFish for i, feature in enumerate(['x', 'y', 'h', 'isi']): #enumerate(fishes[cl]): +#npFish if feature == 'isi': +#npFish fishes[cl][feature].append(fishes[cl][feature][-1]) +#npFish # #print(feature, cl) +#npFish npFishes[cl][i] = np.array(fishes[cl][feature]) +#npFish # #print(npFishes[classlist[0]][0]) +#npFish # #print(npFishes[classlist[0]][2]) +#npFish # #print(npFishes[classlist[0]][3]) +#npFish #np.savetxt('worldpeaks_x_y_cl_2', (x,y,cl, isi), fmt="%s") +#npFish +#npFish np.set_printoptions(threshold=np.nan) +#npFish +#npFish for i, cl in enumerate(np.unique(classlist)): #Neue Klassennamen! +#npFish x = npFishes[cl][0] +#npFish y = npFishes[cl][1] +#npFish h = npFishes[cl][2] +#npFish isi = npFishes[cl][3] +#npFish +#npFish np.savetxt(filename[:-4]+'Fish_xyhisi_cl%d' % i, npFishes[cl], fmt="%s") +#npFish +#npFish +#npFish + + + + + + # / TODO: Peakclassifikator bei weit wegliegenden klassen? Done + # / TODO: Class2 implementation auf class linreg übertragen Done - Doof + # TODO: Klassen zusammenfuegen/ Noise zusammenfuegen + # - Wenn last 3 und first 3 zueinander passen in 1. Amplitude und 2. Periode (falls peaks) oder 2. randomzeugs? - Noiseerkennung und 2. Amplitude + # TODO: Klassen filtern auf Patternausreißer + # diff --git a/thunderfish/DextersThunderfishAddition/analyzeEods.py b/thunderfish/DextersThunderfishAddition/analyzeEods.py new file mode 100644 index 00000000..f75339d5 --- /dev/null +++ b/thunderfish/DextersThunderfishAddition/analyzeEods.py @@ -0,0 +1,1104 @@ +import sys +import numpy as np +import copy +from scipy.stats import gmean +from scipy import signal +from scipy import optimize +import matplotlib.pyplot as plt +import matplotlib.colors as mplcolors +from thunderfish.dataloader import open_data +from thunderfish.peakdetection import detect_peaks +from scipy.interpolate import interp1d +from scipy.signal import savgol_filter +from collections import deque +import nixio as nix +import time +import os +import pickle + +deltat = 60.0 # seconds of buffer size +thresh = 0.05 +mind = 0.1 # minimum distance between peaks +peakwidththresh = 30 # maximum distance between max(peak) and min(trough) of a peak, in datapoints +new = 0 + +def main(): ############################################################# Get arguments eodsfilepath, plot, (opt)save, (opt)new + + filepath = sys.argv[1] + sys.argv = sys.argv[1:] + + plot = 0 + save = 0 + print(sys.argv) + if len(sys.argv)==2: + plot = int(sys.argv[1]) + print(plot) + if len(sys.argv)==3: + plot = int(sys.argv[1]) + save = int(sys.argv[2]) + print('saving results: ', save) + import ntpath + if len(sys.argv)==4: + plot = int(sys.argv[1]) + save = int(sys.argv[2]) + new = int(sys.argv[3]) + print('saving results: ', save) + ntpath.basename("a/b/c") + def path_leaf(path): + head, tail = ntpath.split(path) + return tail or ntpath.basename(head) + filename = path_leaf(filepath) + prefixlen = filename.find('_')+1 + starttime = "2000" + home = os.path.expanduser('~') + path = filename[prefixlen:-4]+"/" + os.chdir(home+'/'+path) # operating in directory home/audiofilename/ + + # if os.path.exists(filename[prefixlen:-4]+'_AmpFreq4.pdf'): + # new = 0 + + with open_data(filename[prefixlen:-4]+".WAV", 0, 60, 0.0, 0.0) as data: + samplerate = data.samplerate + datalen = len(data) + + ############################################################# Fileimport and analyze; or skip, if analyzed data already exists + if new == 1 or not os.path.exists('classes/'+ filename[prefixlen:-4]+"_classes.npz"): + print('new analyse') + eods = np.load(filename, mmap_mode='c') + + # time1 = 40000 + # time2 = 45000 + # time1x = time1 * samplerate + # time2x = time2 * samplerate + # startpeak = np.where(((eods[0]>time1x)&(eods[0]= 100 and i % (classamount//100) == 0: + print(i) + fishclass = eods[:,:][: , classlist == num] + fish = [] + if len(fishclass[0]) < 12: + continue + for i , feature in enumerate(fishclass): + if i != 3: + fish.append(feature) +# print('fish - printing to check structure', fish) + temp_classisi = np.diff(fishclass[0]) + #print(temp_classisi) + #print('plot smooth vs orig', len(temp_classisi)) + binlen=10 + # temp_classisi_medians = temp_classisi#bin_median(temp_classisi, 1) + # smoothed = savgol_filter(temp_classisi_medians,11,1) + # diff = np.square(smoothed-temp_classisi_medians) + # data = np.array(diff) + # result = np.median(data[:(data.size // binlen) * binlen].reshape(-1, binlen),axis=1) + # result2 = bin_percentilediff(temp_classisi, 20) + # if len(result) > 7 and len(result2) > 7: + # smoothedresult = savgol_filter(result, 7, 1) + # smoothedresult2 = savgol_filter(result2, 7, 1) + # else: + # smoothedresult = result + # smoothedresult2 = result2 + # #plt.plot(np.arange(0,len(result)*binlen, binlen),result) + # #plt.plot(smoothed) + # #plt.plot(np.arange(0,len(result2)*20, 20), smoothedresult2) + # #plt.plot(np.arange(0,len(result2)*20, 20), result2) + # # plt.plot(temp_classisi_medians) + # #plt.plot(np.arange(0, len(smoothedresult)*binlen, binlen),smoothedresult) + # noiseindice = np.where(smoothedresult > 100000) + # #print(noiseindice) + # noiseindice = np.multiply(noiseindice, binlen) + # #print(noiseindice) + # noiseindice = [x for i in noiseindice[0] for x in range(i, i+10)] + # print(np.diff(noiseindice)) + # noiseindice = np.split(noiseindice, np.where((np.diff(noiseindice) != 1 ) & (np.diff(noiseindice) != 2) & (np.diff(noiseindice) != 3))[0]+1 ) + # #print(noiseindice) + # noiseindice = [x for arr in noiseindice if len(arr) > 20 for x in arr[50:-51]] + # noiseindice= np.array(noiseindice) + # #print(noiseindice) + # fish = np.array(fish) + # # Noise delete applial + # # if len(noiseindice) >0 : + # # fish[:,noiseindice] = np.nan #np.setdiff1d(np.arange(0, len(fish[0]),1),(noiseindice))] = np.nan + # fish = list(fish) + # #plt.plot(temp_classisi) + # plt.show() + binlen = 60 + #print(len(fish[0])) + if discardcondition1(fish) == False: # condition length < 10 + # if False: + mean, std, d2, d8 = bin_array_mean(temp_classisi,binlen) + # print('mean, std, d2, d8', mean, std, d2, d8) + count = ((mean * 4 >= d8) * (d2 >= mean * 0.25)) .sum() # condition_2 : if 0.2, and 0.8 deciles of the ISI of ONE SECOND/binlen are in the area of the median by a factor of 2, then the class seems to have not too much variability. + # Problem: Case, Frequency changes rapidly during one second/binlen , then the 0.8 or 0.2 will be out of the area... + # But then there is one wrong estimation, not too much of a problem + #print('fish') + # if count >= 0.5*(len(temp_classisi)//binlen +1): + if True: + fishes.append(fish) + #print('len fishes after append', len(fishes)) + #print('printing fishes to check structure', fishes[0][0]) + # ontimes = np.load('ontime'+filename[prefixlen:-4]+'.npz') + # ontime = [] + # # for c, items in enumerate(ontimes.items()): + # # ontime.append(items[1]) + # ontime.append(ontimes['on']) + # ontime.append(ontimes['near']) + # ontime.append(ontimes['far']) + # + # if plot == 1: + # plot_ontimes(ontime) + + #print(eods[0][-1]//samplerate, len(ontime[0])) + if fishes is not None: + + #for fish in fishes: + # fish[0] + + # improving the fishpeak-data by adding peaks at places where theses peaks are hidden behind other (stronger)peaks + #fishes = fill_hidden_3(fishes, eods, filename) # cl-dict : x y z -dict + # filling holes or removing unexpected peaks from the class which are most likely caused by false classification + #fishes, weirdparts = fill_holes(fishes) + #fishes, weirdparts = fill_holes(fishes) + + if fishes is not None: + if len(fishes) > 0: + for cl, fish in enumerate(fishes): + ### Filter to only get ontimes close and nearby + for i, x in enumerate(fish[0]): + print(x) + #if x//samplerate < len(ontime[0]): +# # print(ontime[1][x//samplerate], ontime[0][x//samplerate]) + # if ontime[0][x//samplerate] != 1 and ontime[1][x//samplerate] != 1 and ontime[2][x//samplerate] != 1: + # for feat_i, feature in enumerate(fish): + # fishes[cl][feat_i][i] = np.nan + # print(x//samplerate, ' ignored') + isi = [isi for isi in np.diff(fishes[cl][0])] + isi.append(isi[-1]) + fishes[cl].append(isi) + #fishes[i] # the structure of the array fishes + # 0 x + # 1 y + # 2 h + # 3 isi + npFishes = fishes + + + # fishfeaturecount = len(fishes[cl]) + # for cl in range(len(np.unique(classlist))-1): + # + # fishlen = len(fishes[cl][0]) + # npFishes[cl]= np.memmap(filename[prefixlen:-4]+"_Fish%d"%cl+ ".npmmp", dtype='float32', mode='w+', shape=(fishfeaturecount, fishlen), order = 'F') + # np.zeros([fishfeaturecount, len(fishes[cl]['x'])]) + # for i, feature in enumerate(['x', 'y', 'h', 'isi']): #enumerate(fishes[cl]): + # if feature == 'isi': + # fishes[cl][feature].append(fishes[cl][feature][-1]) + # npFishes[cl][i] = np.array(fishes[cl][feature]) + # + +# np.set_printoptions(threshold=np.nan) + # + if save == 1 and not os.path.exists('classes/'): + os.makedirs('classes/') + + #np.save('classes/'+ filename[prefixlen:-4]+"_class%d"%i, fish) + #print('this', len(npFishes)) + if save == 1: + with open('classes/'+ filename[prefixlen:-4]+"_classes.lst", "wb") as fp: #Pickling + pickle.dump(npFishes, fp) + #np.savez('classes/'+ filename[prefixlen:-4]+"_classes", npFishes) + else: + npFishes = [] + try: + with open('classes/'+ filename[prefixlen:-4]+"_classes.lst", "rb") as fp: #Pickling + npFishes = pickle.load(fp) + # npFishload=np.load('classes/'+ filename[prefixlen:-4]+"_classes.npz") + print('loaded classes') + except: + print('no classes found') + # for fishes in npFishload.files: + # print('loaded ', fishes) + # for fish in npFishload[fishes]: + # fishtemp = np.zeros([4,len(fish[0])]) + # for i, fishfeature in enumerate(fish): + # fishtemp[i] = fishfeature + # npFishes.append(fishtemp) + #print('npFishes to check structure', npFishes[0][0][0]) +# if not os.path.exists('classes/'): +# os.makedirs('classes/') +# if not os.path.exists('classes/'+ filename[prefixlen:-4]+"_classes_red"): +#np.save('classes/'+ filename[prefixlen:-4]+"_class%d"%i, fish) + if new == 1 or not os.path.exists('classes/'+ filename[prefixlen:-4]+"_classes_red.lst"): +# reducednpFishes = npFishes + reducednpFishes = reduce_classes(npFishes)# reducing classes by putting not overlapping classes together + #print('reduced') + if save == 1: + with open('classes/'+ filename[prefixlen:-4]+"_classes_red.lst", "wb") as fp: #Pickling + pickle.dump(reducednpFishes, fp) + #np.savez('classes/'+ filename[prefixlen:-4]+"_classes_red.npz", reducednpFishes) + else: + with open('classes/'+ filename[prefixlen:-4]+"_classes_red.lst", "rb") as fp: #Pickling + reducednpFishes = pickle.load(fp) + #print('len reduced ', len(reducednpFishes)) + if len(reducednpFishes) == 0: + print('no on-/ or nearbytimeclass with sufficient length or good enough data. quitting') + quit() +# reducednpFishload=np.load('classes/'+ filename[prefixlen:-4]+"_classes_red.npz") +# +# for fishes in reducednpFishload.files: +# print('loaded reduced classes') +# for fish in reducednpFishload[fishes]: +# fishtemp = np.zeros([4,len(fish[0])]) +# for i, fishfeature in enumerate(fish): +# fishtemp[i] = fishfeature +# reducednpFishes.append(fishtemp) +# +# for i, rfish in enumerate(reducednpFishes): +# if not os.path.exists('classes/'): +# os.makedirs('classes/') +# np.save('classes/'+ filename[prefixlen:-4]+"_class%d_reduced"%i, rfish) + #print('reducednpFishes to check structure', reducednpFishes[0][3]) + + + + window_freq = 1 + freqavgsecpath = filename[prefixlen:-4]+"_freqs2.npy" + if new == 1 or not os.path.exists(freqavgsecpath): + print('new freq calcing') + avg_freq = np.zeros([len(reducednpFishes),datalen//(samplerate*window_freq)+1]) + avg_isi = np.zeros([len(reducednpFishes),datalen//(samplerate*window_freq)+1]) + for i, fish in enumerate(reducednpFishes): + fish = np.array(fish) + avg_freqs_temp = [] + avg_isi_temp = [] + peak_ind = 0 + sec = 0 + for secx in np.arange(fish[0][0],fish[0][-1], samplerate*window_freq): + #count_peaks_in_second = ((secx < fish[0]) & (fish[0] < secx+samplerate*window_freq)).sum() + # isimean_peaks_in_second = fish[3][(secx < fish[0]) & (fish[0] < secx+samplerate*window_freq)].mean() # # # # # # # # # Using median instead of mean. Thus, hopefully overgoing outlier-isis, which are due to Peaks hidden beneath stronger Peaks of another fish. + #freq_in_bin = samplerate/isimean_peaks_in_second + sec_peaks = fish[3][(secx <= fish[0]) & (fish[0] < secx+samplerate*window_freq)] + #sec_freq = np.divide(samplerate,sec_peaks) + print(sec_peaks) + if len(sec_peaks) > 0: + #perctop, percbot = np.percentile(sec_peaks, [45, 55]) + #peakisi_in_bin = sec_peaks[(perctop>=sec_peaks)&(sec_peaks>=percbot)].mean() + #print(perctop, percbot, peaks_in_bin) + #isimean_peaks_in_bin = sec_peaks[(perctop >=sec_peaks)&(sec_peaks>=percbot)].mean() + isimean_peaks_in_bin = np.median(sec_peaks) + freq_in_bin = samplerate/isimean_peaks_in_bin + else: freq_in_bin = np.nan + ################################################################################################################################### TODO + #isimean_peaks_in_bin = np.median(fish[3][(secx < fish[0]) & (fish[0] < secx+samplerate*window_freq)]) + print(freq_in_bin) + #freq_in_bin = count_peaks_in_second + if 5 < freq_in_bin < 140: + avg_freqs_temp.append(freq_in_bin) + else: + avg_freqs_temp.append(np.nan) + sec+=1 + #print(sec, freq_in_bin) + # avg_freqs_temp, noiseindice = noisedelete_smoothing(avg_freqs_temp, 3, 2, 100000, 1000) + #avg_freqs_temp, noiseindice = noisedelete_lowpass(avg_freqs_temp, binlen= 10) + avg_freq[i, fish[0][0]//(samplerate*window_freq) : fish[0][0]//(samplerate*window_freq)+sec] = np.array(avg_freqs_temp) + #plt.show() + + + + + + if save == 1: + np.save(freqavgsecpath, avg_freq) + else: + avg_freq = np.load(freqavgsecpath) + print('loaded freqs') + #for i in avg_isi_fish: + # print('avg_freqs_byisi') + # plt.plot(i) + #plt.xlabel('seconds') + #plt.ylabel('isi of peaks') + #plt.show() + # cmap = plt.get_cmap('jet') + # colors =cmap(np.linspace(0, 1.0, 3000)) #len(np.unique(classlist)))) + # np.random.seed(22) + # np.random.shuffle(colors) + # colors = [colors[cl] for cl in range(len(avg_freq_fish))] + # for i, col in zip(avg_freq_fish, colors): + # print('avg_freqs', 'len:' ,len(avg_freq_fish)) + # plt.plot(i, color = col) + # plt.xlabel('seconds') + # plt.ylabel('frequency of peaks') + # plt.show() + ## #print(avg_freqs[0]) + + + window_avg = 1 + ampavgsecpath = filename[prefixlen:-4]+'_amps2.npy' + #freqtime = np.arange(0, len(data), samplerate) + if new == 1 or not os.path.exists(ampavgsecpath): + avg_amps_temp = [] + peak_ind = 0 + + avg_amp = np.zeros([len(reducednpFishes),datalen//(samplerate*window_avg)+1]) + #avg_amp_fish = np.memmap(ampavgsecpath, dtype='float32', mode='w+', shape=(len(reducednpFishes),datalen//samplerate+1)) + + for i, fish in enumerate(reducednpFishes): + if len(fish[0]) >= 20: + #print('amp, ', i, '/', len(reducednpFishes)) + step = 0 + avg_amps_temp = [] + for secx in np.arange(fish[0][0],fish[0][-1], samplerate*window_avg): + amp_in_second = fish[2][(secx < fish[0]) & (fish[0] < secx+samplerate*window_avg)].mean() + # print(i, peak_ind, amp_in_second) + avg_amps_temp.append(amp_in_second) + step+=1 + #print('avg_amps_temp', avg_amps_temp) + #avg_amps = np.memmap(ampavgsecpath, dtype='float32', mode='w+', shape=(len(avg_amps_temp), )) + #avg_amps[:] = avg_amps_temp + + avg_amps_temp = np.array(avg_amps_temp) + avg_amps_temp[np.where(np.isnan(avg_amps_temp))] = 0.0 + avg_amp[i, fish[0][0]//(samplerate*window_avg) : fish[0][0]//(samplerate*window_avg)+step] = avg_amps_temp + + if save == 1: + np.save(ampavgsecpath, avg_amp) +# np.save(ampavgsecpath, avg_amp_fish) + # print('avg_amps ',avg_amps) + #avg_freqs.append(np.mean(eods_freq[i:i+samplerate])) + else: + #avg_amps = np.memmap(ampavgsecpath, dtype='float32', mode='r', shape=(data//samplerate)) + avg_amp = np.load(ampavgsecpath) + print('loaded amp') + + if new == 1 or plot == 1 : + # Plotting ####################################################################################################################### + ################################################################################################################################## + + import matplotlib.gridspec as gridspec + gs = gridspec.GridSpec(2, 2, height_ratios=(1, 1), width_ratios=(1, 0.02)) + + # Tuning colors + maxfreq = 140 + coloroffset = 5 + # Colorbar Choice + cmap = plt.get_cmap('magma')#'gist_rainbow') + cmap_amp = plt.get_cmap('Blues')#'gist_rainbow') + # Colorbar Workaround + Z = [[0,0],[0,0]] + min, max = (0, maxfreq) + step = 1 + levels = np.arange(min,max+step,step) + CS3 = plt.contourf(Z, levels, cmap=cmap) + plt.clf() + plt.close() + ##################### + # Colorbar Workaround + Z = [[0,0],[0,0]] + min, max = (0, 1) + step = 1/100 + levels = np.arange(min,max+step,step) + CSa = plt.contourf(Z, levels, cmap=cmap_amp) + plt.clf() + plt.close() + ##################### + # mapping colormap onto fixed array of frequencyrange + step = 1/maxfreq + collist = cmap(np.arange(0, 1+step, step)) + ampstep = 1/200 + collist_amp = cmap_amp(np.arange(0, 1+ampstep, ampstep)) + collist_amp = collist_amp[100:]#[::-1] + print(collist[0], collist[-1], collist[140]) + + plt.rcParams['figure.figsize'] = 20,4.45 + ampax = plt.subplot(gs[1,:-1]) + #freqax = ampax.twinx() + freqax = plt.subplot(gs[0,:-1], sharex=ampax) + barax = plt.subplot(gs[1,-1]) + ampbarax = plt.subplot(gs[0,-1]) + avg_freq[ avg_freq == 0 ] = np.nan + avg_amp[ avg_amp == 0 ] = np.nan + # colorlist = np.zeros([len(avg_freq)]) + # valuecount = 0 + + # remove amp where freq is np.nan + # might actually not belong in the plotting section.. + #for f, a in zip(avg_freq, avg_amp): + # a[np.isnan(f)] = np.nan + + for f, a in zip(avg_freq, avg_amp): + myred='#d62728' + myorange='#ff7f0e' + mygreen='#2ca02c' + mylightgreen="#bcbd22" + mygray="#7f7f7f" + myblue='#1f77b4' + mylightblue="#17becf" + newlightblue = "#e1f7fd" + # getting the right color for each scatterpoint + fc = f[~np.isnan(f)] + #collist = np.append(np.array([collist[0,:]]*30),(collist[30:]), axis = 0) + fc[fc > maxfreq] = maxfreq + #fc[fc < coloroffset] = 0 + #collist = np.append(np.array([collist[0,:]]*coloroffset),(collist[coloroffset:]), axis = 0) + #col = [collist[v-coloroffset] if c >= coloroffset else collist[0] for v in fc if coloroffset <= v <= maxfreq] + col = [collist[int(v)] for v in fc] + ampcol = [collist_amp[int(v*100/2)] for v in a[~np.isnan(a)]] + # plotting + l1 = ampax.scatter(np.arange(0, len(a)*window_avg, window_avg) ,a, s = 1,label = 'amplitude', color = col)#colors[col], ls = ':') + l2 = freqax.scatter(np.arange(0,len(f)*window_freq,window_freq),f, s = 1, label = 'frequency', color = ampcol)#colors[col]) + # ls = l1+l2 + #labels = [l.get_label() for l in ls] + # ampax.legend(ls, labels, loc=0) + ampax.set_xlabel('Time [s]') + ampax.set_ylabel('amplitude of peaks') + freqax.set_ylabel('frequency of peaks') + freqbar =plt.colorbar(CS3, cax = barax) + ampbar = plt.colorbar(CSa, cax = ampbarax ) + freqbar.set_ticks([0,20,40,60,80,100,120]) + ampbar.set_ticks([0,0.2,0.4,0.6,0.8,1.0,1.2,1.4,1.8]) + ampbar.set_clim(-1,1) + freqax.set_xlim(0,len(a)*window_avg) + freqax.set_ylim(0,maxfreq) + ampax.set_xlim(0, len(a)*window_avg) + ampax.set_ylim(0,2) + plt.setp(freqax.get_xticklabels(), visible=False) + # remove last tick label for the second subplot + yticks = ampax.yaxis.get_major_ticks() + yticks[-1].label1.set_visible(False) + plt.subplots_adjust(hspace=.0) + print('plot', plot) + if plot == 1: + print('show plot') + plt.show() + if save == 1: + plt.savefig(filename[prefixlen:-4]+'_AmpFreq5.pdf') + else: + print('already saved figure, if you want to see the result start with plot == 1') + + +def bin_percentilediff(data, binlen): + data = np.array(data) + return np.percentile(data[:(data.size // binlen) * binlen].reshape(-1, binlen),95, axis=1) - np.percentile(data[:(data.size // binlen) * binlen].reshape(-1, binlen), 5 , axis=1) + +def bin_mean(data, binlen): + return np.mean(data[:(data.size // binlen) * binlen].reshape(-1, binlen),axis=1) + # window_bigavg = 300 + # big_bin = [] + # for i in np.arange(0,len(avg_freq[0]),window_bigavg): # print('iiii?', i) + # collector = [] + # for f, a, col in zip(avg_freq, avg_amp, colorlist): + # for data in f[i//window_freq:(i+window_bigavg)//window_freq]: + # if data != 0 and not np.isnan(data): + # collector.append(data) + # print(collector) + # if len(collector) >100: + # big_bin.append(collector) + # for part in big_bin: + # print('i') + # plt.hist(part, bins = 250, range = (0,250)) + # plt.show() + + +def bin_ratio_std_mean(array, binlen): + #print( bin_array_std(array, binlen)/bin_array_mean(array,binlen) ) + mean, std, d2, d8 = bin_array_mean(array,binlen) + #print('mean, std, d2, d8', mean, std, d2, d8) + return mean * 2 > d8 > mean > d2 > mean * 0.5 + + +def bin_array_std(array, binlen): + bins = len(array)//binlen + stds = np.zeros((bins+1)) + #print(array[0: binlen]) + for i in range(len(stds)): + stds[i] = np.std(array[i*binlen: (i+1)*binlen]) + #print('stds0', stds[0], len(array)) + return stds + + +def bin_array_mean(array, binlen): + bins = len(array)//binlen +1 if len(array) % binlen != 0 else len(array)//binlen + means = np.zeros((bins)) + #print(array[0: binlen]) + stds = np.zeros((bins)) + d2 = np.zeros((bins)) + d8 = np.zeros((bins)) + for i in range(bins): + stds[i] = np.std(array[i*binlen: (i+1)*binlen]) + means[i] = np.median(array[i*binlen: (i+1)*binlen]) + d2[i] = np.percentile(array[i*binlen: (i+1)*binlen], 20) + d8[i] = np.percentile(array[i*binlen: (i+1)*binlen], 80) + + # means[i] = np.mean(array[i*binlen: (i+1)*binlen]) + #print('mean0',means[0], len(array)) + return means, stds, d2, d8 + + + + +def bin_ndarray(ndarray, new_shape, operation='sum'): + """ + Bins an ndarray in all axes based on the target shape, by summing or + averaging. + + Number of output dimensions must match number of input dimensions and + new axes must divide old ones. + + Example + ------- + >>> m = np.arange(0,100,1).reshape((10,10)) + >>> n = bin_ndarray(m, new_shape=(5,5), operation='sum') + >>> print(n) + + [[ 22 30 38 46 54] + [102 110 118 126 134] + [182 190 198 206 214] + [262 270 278 286 294] + [342 350 358 366 374]] + + """ + operation = operation.lower() + if not operation in ['sum', 'mean', 'std']: + raise ValueError("Operation not supported.") + if ndarray.ndim != len(new_shape): + raise ValueError("Shape mismatch: {} -> {}".format(ndarray.shape, + new_shape)) + compression_pairs = [(d, c//d) for d,c in zip(new_shape, + ndarray.shape)] + + #print(len(new_shape)) + flattened = [l for p in compression_pairs for l in p] + + ndarray = ndarray.reshape(len(flattened)) + for i in range(len(new_shape)): + op = getattr(ndarray, operation) + ndarray = op(-1*(i+1)) + return ndarray + + + + + + +def fill_hidden_3(fishes, eods, filename): + fishes = fishes + #print('hidden_calcing...') + nohidefishes = [] + for cl, fish in enumerate(fishes): + #print('Step1: Fish ', cl, ' ', cl, ' / ', len(fishes)) + #f = np.memmap(filename[prefixlen:-4]+"_Fish%d"%cl+ "X.npmmp", dtype='float32', mode='w+', shape=(3,len(fish[0])*2), order = 'F') + f = np.zeros([3, len(fish[0])*2]) + fishisi = np.diff(fish[0]) + isi = fishisi[0] + lst_offst =0 + for i, newisi in enumerate(fishisi): + # print(cl, ' ..currently peak ', i, ' / ' , len(fishisi)) + newi = i+lst_offst + if newi > len(f[0])-1: # Errör + # print('Oh shit, nparray to small. doubling size') + f_new = np.empty([3,len(f[0])*2]) + f_new[:,:len(f[0])]=f + f = f_new + f[0][newi]=fish[0][i] + f[1][newi]=fish[1][i] + f[2][newi]=fish[2][i] + +# print(i, newi) + + + # print(cl, fish[0][i], isi, newisi) + if newisi > 2.8*isi: + guessx = fish[0][i] + isi + while guessx < fish[0][i] + newisi-0.8*isi: + peakx = peakaround3(guessx, isi*0.1, eods) + if peakx is not None: + newi = i+lst_offst + f[0][newi+1]=peakx + f[1][newi+1]=fish[1][i] + f[2][newi+1]=fish[2][i] + #print('estimated hidden peak: ', f[0][newi+1], f[2][newi+1]) + guessx = peakx + isi + (peakx-guessx) + lst_offst +=1 + #print('offset+1 at' ,i , peakx) + continue + break + isi = newisi + + + + nohidefishes.append(np.array([f[0,0:newi+1],f[1,0:newi+1],f[2,0:newi+1]])) + + + #print(x[0], x[200]) + return nohidefishes + + +def fill_hidden_Not(fishes, eods, filename): + fishes = fishes + #print('hidden_calcing...') + nohidefishes = [] + #for cl, fish in enumerate(fishes): + #print('Step1: Fish ', cl, ' ', cl, ' / ', len(fishes)) + #f = np.memmap(filename[prefixlen:-4]+"_Fish%d"%cl+ "X.npmmp", dtype='float32', mode='w+', shape=(3,len(fish[0])*2), order = 'F') + return nohidefishes + +def noisedelete_smoothing(array, binlen, method, thr1, thr2): + if len(array) <= 2: + if np.mean(array) > 140: + for a in array: + a = np.nan + return array, np.arange(0, len(array), 1) + temp_classisi = array + if len(array) > 11: + smoothed = savgol_filter(temp_classisi, 11, 1) + else: smoothed = savgol_filter(temp_classisi, 3, 1) + diff = np.square(smoothed-temp_classisi) + data = np.array(diff) + #plt.plot(diff, color = 'green') + result = np.median(data[:(data.size // binlen) * binlen].reshape(-1, binlen),axis=1) + result2 = bin_percentilediff(temp_classisi, binlen) + if method == 1: + result = result + elif method == 2: + result = result2 + if len(result) > 7: + smoothedresult = savgol_filter(result, 7, 1) + else: + smoothedresult = result + #plt.plot(np.arange(0,len(result)*binlen, binlen),result) + #plt.plot(smoothed) + #plt.plot(np.arange(0,len(result2)*20, 20), smoothedresult2) + #plt.plot(np.arange(0,len(result2)*20, 20), result2) + # plt.plot(temp_classisi, color = 'black') + # plt.plot(np.arange(0, len(result)*binlen, binlen),smoothedresult, 'red') + if method ==1 : + noiseindice = np.where(smoothedresult > thr1) + elif method == 2: + noiseindice = np.where(result > thr2)[0] + elif method == 3: + noiseindice = np.where(data > 1000) + print(noiseindice) + noiseindice = np.multiply(noiseindice, binlen) + print(noiseindice) + noiseindice = [x for i in noiseindice for x in range(i, i+binlen)] + print(np.diff(noiseindice)) + noiseindice = np.split(noiseindice, np.where((np.diff(noiseindice) != 1 ) & (np.diff(noiseindice) != 2) & (np.diff(noiseindice) != 3))[0]+1 ) + #print(noiseindice) + noiseindice = [x for arr in noiseindice if len(arr) > 1 for x in arr] + noiseindice= np.array(noiseindice) + #print(noiseindice) + array = np.array(array) + # Noise delete applial + if np.median(array) > 150: + noiseindice = np.arange(0, len(array), 1) + if len(noiseindice) > 0: + array[noiseindice] = np.nan + return array, noiseindice + +def noisedelete_lowpass(array,binlen): + origarray = array + if len(array) <= 5: + if np.mean(array) > 140 or np.mean(array) < 15: + for a in array: + a = np.nan + return array, [] #np.arange(0, len(array), 1) + array = np.array(array) + from scipy.signal import butter, lfilter + indice = [] + alldata = np.empty_like(array) + if len(array[np.isnan(array)]) > 0: + arrays = np.split(array, np.where(np.abs(np.diff(np.isnan(array))) == 1)[0]+1) + indice = np.where(np.abs(np.diff(np.isnan(array))) == 1)[0]+1 + indice = np.append(np.array([0]),indice) + else: + arrays = [array] + indice = [0] + for array,index in zip(arrays, indice): + if len(array) <2 or len(array[np.isnan(array)]) > 0: + alldata[index:index + len(array)] = array[:] + continue + print(array, 'array') + fs = 100 + cutoff = 25 + binlen = binlen + data = np.array(array, dtype = 'float64') + overlap = len(data)%binlen + if overlap > 0: + data = np.append(data, np.array([data[-1]]*(binlen-overlap)), axis = 0) + dataext = np.empty([data.shape[0]+20]) + dataext[:10]= data[0] + dataext[-10:] = data[-1] + dataext[10:-10]=data + B, A = butter(1, cutoff/ (fs / 2), btype = 'low') + #lpf_array = np.empty_like(dataext) + lpf_array= lfilter(B, A, dataext, axis = 0) + lpf_array = lfilter(B, A, lpf_array[::-1])[::-1] + lpf_binned_array = lpf_array[:(data.size // binlen) * binlen].reshape(-1, binlen) + lpf_array = lpf_array[10:-10] + if overlap > 0: + lpf_array[-(binlen-overlap):] = np.nan + data[-(binlen-overlap):] = np.nan + binned_array = data[:(data.size // binlen) * binlen].reshape(-1, binlen) + lpf_binned_array = lpf_array[:(data.size // binlen) * binlen].reshape(-1, binlen) + filterdiffs = np.empty([binned_array.shape[0]]) + #a = signal.firwin(1, cutoff = 0.3, window = "hamming") + for i, (bin_content, bin_filtered) in enumerate(zip(binned_array, lpf_binned_array)): + if i == binned_array.shape[0] - 1: + bin_content = bin_content[:-(binlen-overlap)] + bin_filtered = bin_filtered[:-(binlen-overlap)] + filterdiffs[i] = np.mean(np.square(np.subtract(bin_filtered[~np.isnan(bin_filtered)], bin_content[~np.isnan(bin_content)]))) + # filterdiff = filterdiff / len(bin_content) + print(filterdiffs) + binned_array[filterdiffs > 1, :] = np.nan + if overlap > 0: + data = binned_array.flatten()[:-(binlen-overlap)] + else: + data = binned_array.flatten() + print(data, 'data') + alldata[index:index + len(data)] = data + # twin[np.isnan(data)] = np.nan + # plt.plot(alldata, color = 'red') + # plt.plot(np.add(origarray, 2), color = 'blue') + # plt.ylim(0, 150) + # plt.show() + return alldata, [] + + # noiseindice = np.multiply(noiseindice, binlen) + # print(noiseindice) + # noiseindice = [x for i in noiseindice for x in range(i, i+binlen)] + # print(np.diff(noiseindice)) + # noiseindice = np.split(noiseindice, np.where((np.diff(noiseindice) != 1 ) & (np.diff(noiseindice) != 2) & (np.diff(noiseindice) != 3))[0]+1 ) + + # #print(noiseindice) + # noiseindice = [x for arr in noiseindice if len(arr) > 1 for x in arr] + # noiseindice= np.array(noiseindice) + # #print(noiseindice) + # array = np.array(array) + # # Noise delete applial + # if np.median(array) > 150: + # noiseindice = np.arange(0, len(array), 1) + # if len(noiseindice) > 0: + # array[noiseindice] = np.nan + # return array, noiseindice + + +def peakaround3(guessx, interval, eods): + pksinintv = eods[0][ ((guessx-interval < eods[0]) & (eods[0] < guessx+interval))] + if len(pksinintv)>0: + return(pksinintv[0]) + elif len(pksinintv) >1: + pksinintv = pksinintv[np.argmin(abs(pksinintv - guessx))] + return(pksinintv) ## might be bad, not tested + # for px in fish[0]: + # distold = interval + # if px < guessx-interval: + # continue + # # print('in area', guessx-interval) + # if guessx-interval < px < guessx+interval: + # found = True + # dist = px-guessx + # if abs(dist) < abs(distold): + # distold = dist + # if px > guessx+interval: + # + # if found == True: + # print(guessx, dist) + # time.sleep(5) + # return guessx + dist + # + # else: + # + # break + return None + + + +def fill_holes(fishes): #returns peakx, peaky, peakheight # Fills holes that seem to be missed peaks in peakarray with fake (X/Y/height)-Peaks + retur = [] + lost = [] + + #print('fill_holes fishes', fishes) + + for cl, fish in enumerate(fishes): + #print('Step2: Fish', cl) + fishisi = np.diff(fish[0]) + mark = np.zeros_like(fishisi) + isi = 0 + #print('mark', mark) + # print('fishisi' , fishisi) + #find zigzag: + c=0 + c0= 0 + n=0 + for i, newisi in enumerate(fishisi): + # print(newisi, isi) + if abs(newisi - isi)>0.15*isi: ## ZigZag-Detection : actually peaks of two classes in one class - leads to overlapping frequencys which shows in a zigzag pattern + if (newisi > isi) != (fishisi[i-1] > isi): + c+=1 + # print(abs(newisi - isi), 'x = ', fish[i].x) + c0+=1 + elif c > 0: + n += 1 + if n == 6: + if c > 6: + # print ('zigzag x = ', fish['x'][i-6-c0], fish['x'][i-6]) + mark[i-6-c0:i-6]= -5 + c = 0 + c0=0 + n = 0 + + #if c > 0: + # print(i, c) + # if c == 6: + # print('zigzag!') + isi = newisi + isi = 0 + for i, newisi in enumerate(fishisi): ## fill holes of up to 3 Peaks # Changed to: Only up to 1 Peak because : Holes might be intended for communicational reasons + #print('mark: ' , mark) + if mark[i] == -5: continue + if i+2 >= len(fishisi): + continue + if (2.2*isi > newisi > 1.8*isi) and (1.5*isi>fishisi[i+1] > 0.5*isi) : + mark[i] = 1 + isi = newisi + # print('found 1!' , i) + elif (2.2*isi > newisi > 1.8*isi) and (2.2*isi> fishisi[i+1] > 1.8*isi) and (1.5*isi > fishisi[i+2] > 0.5*isi): + mark[i] = 1 + isi = isi + #elif 3.4*isi > newisi > 2.6*isi and 1.5*isi > fishisi[i+1] > 0.5*isi: + # mark[i] = 2 + + elif (0.6* isi > newisi > 0): + # print('-1 found', i ) + if mark[i] ==0 and mark[i+1] ==0 and mark[i-1]==0 : + # isi newisi + # continue + # print('was not already set') + if fishisi[i-2] > isi < fishisi[i+1]: + mark[i] = -1 + # print('-1') + elif isi > fishisi[i+1] < fishisi[i+2]: + mark[i+1] = -1 + # print('-1') + isi = newisi + x = [] + y = [] + h = [] + x_lost=[] + y_lost=[] + h_lost=[] + # print('filledmarks: ', mark) + for i, m in enumerate(mark): + if m == -1 : + # print('-1 at x = ', fish['x'][i]) + continue + if m == -5: + x_lost.append(fish[0][i]) + y_lost.append(fish[1][i]) + h_lost.append(fish[2][i]) + x.append(fish[0][i]) + y.append(fish[1][i]) + h.append(fish[2][i]) + continue + x.append(fish[0][i]) + y.append(fish[1][i]) + h.append(fish[2][i]) + if m == 1: + # print('hofly added peak at x = ' , fish['x'][i]) + x.append(fish[0][i] + fishisi[i-1]) + y.append( 0.5*(fish[1][i]+fish[1][i+1])) + h.append(0.5*(fish[2][i]+fish[2][i+1])) + elif m== 2: + x.append(fish[0][i] + fishisi[i]) + y.append( 0.5*(fish[1][i]+fish[1][i+1])) + h.append(0.5*(fish[2][i]+fish[2][i+2])) + x.append(fish[0][i] + 2*fishisi[i-1]) + y.append( 0.5*(fish[1][i]+fish[1][i+2])) + h.append(0.5*(fish[2][i]+fish[2][i+2])) + # print('added at x = ', fish[0][i] + fishisi[i]) + x = np.array(x) + y= np.array(y) + h = np.array(h) + x_lost = np.array(x_lost) + y_lost = np.array(y_lost) + h_lost = np.array(h_lost) + #print('retur', x, y, h) + retur.append([x,y,h]) + lost.append([x_lost,y_lost,h_lost]) + # filledpeaks =np.array(filledpeaks) + # print(filledpeaks.shape) + # filledpeaks. + + return retur, lost + + +# eods[-len(thisblock_eods[:,]):] = thisblock_eods +# eods = np.memmap("eods_"+filename[:-3]+"npy", dtype='float32', mode='r+', shape=(4,eods_len)) + #fp = np.memmap(filepath[:-len(filename)]+"eods_"+filename[:-3]+"npy", dtype='float32', mode='r+', shape=(4,len(thisblock_eods[:,]))) + #nix print( b.data_arrays) + # for cl in np.unique(cllist): + # currentfish_x = x[:][cllist == cl] + # currentfish_y = y[:][cllist == cl] + # currentfish_h d= x[:][cllist == cl] + #nix try: + #nix xpositions[cl] = b.create_data_array("f%d_eods" %cl, "spiketimes", data = currentfish_x) + #nix xpositions[cl].append_set_dimension() + #nix # thisfish_eods = b.create_multi_tag("f%d_eods_x"%cl, "eods.position", xpositions[cl]) + #nix # thisfish_eods.references.append(nixdata) + #nix except nix.pycore.exceptions.exceptions.DuplicateName: + #nix + #nix xpositions[cl].append(currentfish_x) + + + #thisfish_eods.create_feature(y, nix.LinkType.Indexed) + #b.create_multi_tag("f%d_eods_y"%cl, "eods.y", positions = y) + #b.create_multi_tag("f%d_eods_h"%cl, "eods.amplitude", positions = h) + #thisfish_eods.create_feature + + #nix file.close() + # Save Data + # Needed: + # Meta: Starttime, Startdate, Length + # x, y, h, cl, difftonextinclass -> freq ? , + + # Later: Find "Nofish" + # Find "Twofish" + # Find "BadData" + # Find "Freqpeak" + # ? Find "Amppeak" + # + + # bigblock = np.array(bigblock) + # x=xarray(bigblock) + # y=yarray(bigblock) + # cl=clarray(bigblock) + + + #nix file = nix.File.open(file_name, nix.FileMode.ReadWrite) + #nix b = file.blocks[0] + #nix nixdata = b.data_arrays[0] + #nix cldata = [] + #nix print(classes) + #nix print(b.data_arrays) + #nix for i in range(len(np.unique(classes))): + #nix cldata.append(b.data_arrays[i+1]) + + + # for cl in + + # for cl in + # x = thisfish_eods + + + #nix file.close() + + + +def reduce_classes(npFishes): + offtimeclasses = [] + for i, fish in enumerate(npFishes): + fish = np.array(fish) + #print(fish[0]) + # print('nüFishes before and after command') + # print('bef', npFishes[i][0][0]) + # print(fish[:,:][:,np.where(~np.isnan(fish[0]))].reshape(4,-1)) + npFishes[i] = fish[:,:][:,np.where(~np.isnan(fish[0]))][:,0] + # print('after', npFishes[i][0][0]) + if len(npFishes[i][0]) <= 100: + offtimeclasses.append(i) + #print('delete class ', i) + #print('Len offtime vs len Fishes', len(offtimeclasses), len(npFishes)) + for index in sorted(offtimeclasses, reverse=True): + del npFishes[index] + #print('npFishes to check features', npFishes[0][3]) + srt_beg = sort_beginning(npFishes) + # print(len(npFishes[0])) + # print(len(srt_beg)) + #srt_end = sort_ending(npFishes) + if len(srt_beg) >= 1: + reduced = [] + reduced.append(srt_beg[0]) + #for i, fish in enumerate(srt_beg): + #print(len(srt_beg)) + #print('reducing classes') + for i in range(1, len(srt_beg)): + #print('.', end = '') + cl = 0 + reducedlen_beg = len(reduced) + while cl < reducedlen_beg: + cond1 = reduced[cl][0][-1] < srt_beg[i][0][0] + cond2 = False + nxt=i+1 + while nxt < len(srt_beg) and srt_beg[i][0][-1] > srt_beg[nxt][0][0]: #part ends after another part started (possibly in the other part. + if len(srt_beg[nxt][0]) > len(srt_beg[i][0]):# -> lencheck to pick longer part) + reduced.append(srt_beg[i]) + # print('case1') + break + nxt+=1 + else: + cond2 = True + # print('lenreduced', len(reduced), len(srt_beg)) + #print(i, cl, cond1, cond2 ) + if cond1 and cond2: + #print(reduced[cl].shape, srt_beg[i].shape) + reduced[cl] = np.concatenate((reduced[cl],srt_beg[i]), axis=1) + #print(len(reduced[cl][0]), len(srt_beg[i][0])) + cl+=1 + break + if cond2 == False: + break + cl+=1 + else: + reduced.append(srt_beg[i]) + + #print('len red', len(reduced)) + #print(len(npFishes[0])) + return reduced + else: + return [] + +def sort_beginning(npFishes): + srted = npFishes + srted.sort(key=lambda x: x[0][0]) + #for i in srted[0][0]: + # print(i) + + return srted + +def sort_ending(npFishes): + srted = npFishes[:] + srted.sort(key=lambda x: x[0][-1]) + return srted + +def noisedscrd(fishes): + for fish in fishes: + print(np.std(fish[2])) + + +def plot_ontimes(ontime): + plt.fill_between(range(len(ontime[0])), ontime[0], color = '#1e2c3c', label = 'close') #'#324A64' + plt.fill_between(range(len(ontime[1])), ontime[1], color = '#324A64', label = 'nearby') + plt.fill_between(range(len(ontime[2])), ontime[2], color = '#8ea0b4', label = 'far') + plt.xlabel('seconds') + plt.ylabel('position') + plt.legend(loc = 1) + plt.ylim(0,1.5) + # plt.xlim(0,len()) + plt.show() + + + +if __name__ == '__main__': + main() diff --git a/thunderfish/DextersThunderfishAddition/analyzeEods_lowpass.py b/thunderfish/DextersThunderfishAddition/analyzeEods_lowpass.py new file mode 100644 index 00000000..41d8d039 --- /dev/null +++ b/thunderfish/DextersThunderfishAddition/analyzeEods_lowpass.py @@ -0,0 +1,1130 @@ +import sys +import numpy as np +import copy +from scipy.stats import gmean +from scipy import signal +from scipy import optimize +import matplotlib.pyplot as plt +import matplotlib.colors as mplcolors +from thunderfish.dataloader import open_data +from thunderfish.peakdetection import detect_peaks +from scipy.interpolate import interp1d +from scipy.signal import savgol_filter +from collections import deque +import nixio as nix +import time +import os +import pickle + + +deltat = 60.0 # seconds of buffer size +thresh = 0.05 +mind = 0.1 # minimum distance between peaks +peakwidththresh = 30 # maximum distance between max(peak) and min(trough) of a peak, in datapoints +new = 0 + +def main(): ############################################################# Get arguments eodsfilepath, plot, (opt)save, (opt)new + + filepath = sys.argv[1] + sys.argv = sys.argv[1:] + + + plot = 0 + save = 0 + print(sys.argv) + if len(sys.argv)==2: + plot = int(sys.argv[1]) + print(plot) + if len(sys.argv)==3: + plot = int(sys.argv[1]) + save = int(sys.argv[2]) + print('saving results: ', save) + import ntpath + if len(sys.argv)==4: + plot = int(sys.argv[1]) + save = int(sys.argv[2]) + new = int(sys.argv[3]) + print('saving results: ', save) + ntpath.basename("a/b/c") + def path_leaf(path): + head, tail = ntpath.split(path) + return tail or ntpath.basename(head) + filename = path_leaf(filepath) + prefixlen = filename.find('_')+1 + starttime = "2000" + home = os.path.expanduser('~') + path = filename[prefixlen:-4]+"/" + os.chdir(home+'/'+path) # operating in directory home/audiofilename/ + if os.path.exists(filename[prefixlen:-4]+"_freqs2_lp.npy"): + if os.path.getsize(filename[prefixlen:-4]+"_freqs2_lp.npy") > 0: + print('exists already') + quit() + else: + print('found a 0!') + + # if os.path.exists(filename[prefixlen:-4]+'_AmpFreq5_lp.pdf'): + # new = 0 + + with open_data(filename[prefixlen:-4]+".WAV", 0, 60, 0.0, 0.0) as data: + samplerate = data.samplerate + datalen = len(data) + + ############################################################# Fileimport and analyze; or skip, if analyzed data already exists + if new == 1 or not os.path.exists('classes/'+ filename[prefixlen:-4]+"_classes.npz"): + print('new analyse') + eods = np.load(filename, mmap_mode='c') + + # time1 = 40000 + # time2 = 45000 + # time1x = time1 * samplerate + # time2x = time2 * samplerate + # startpeak = np.where(((eods[0]>time1x)&(eods[0]= 100 and i % (classamount//100) == 0: + print(i) + fishclass = eods[:,:][: , classlist == num] + fish = [] + if len(fishclass[0]) < 12: + continue + for i , feature in enumerate(fishclass): + if i != 3: + fish.append(feature) +# print('fish - printing to check structure', fish) + temp_classisi = np.diff(fishclass[0]) + #print(temp_classisi) + #print('plot smooth vs orig', len(temp_classisi)) + binlen=10 + temp_classisi_medians = temp_classisi#bin_median(temp_classisi, 1) + smoothed = savgol_filter(temp_classisi_medians,11,1) + diff = np.square(smoothed-temp_classisi_medians) + data = np.array(diff) + result = np.median(data[:(data.size // binlen) * binlen].reshape(-1, binlen),axis=1) + result2 = bin_percentilediff(temp_classisi, 20) + if len(result) > 7 and len(result2) > 7: + smoothedresult = savgol_filter(result, 7, 1) + smoothedresult2 = savgol_filter(result2, 7, 1) + else: + smoothedresult = result + smoothedresult2 = result2 + #plt.plot(np.arange(0,len(result)*binlen, binlen),result) + #plt.plot(smoothed) + #plt.plot(np.arange(0,len(result2)*20, 20), smoothedresult2) + #plt.plot(np.arange(0,len(result2)*20, 20), result2) + # plt.plot(temp_classisi_medians) + #plt.plot(np.arange(0, len(smoothedresult)*binlen, binlen),smoothedresult) + noiseindice = np.where(smoothedresult > 100000) + #print(noiseindice) + noiseindice = np.multiply(noiseindice, binlen) + #print(noiseindice) + noiseindice = [x for i in noiseindice[0] for x in range(i, i+10)] + print(np.diff(noiseindice)) + noiseindice = np.split(noiseindice, np.where((np.diff(noiseindice) != 1 ) & (np.diff(noiseindice) != 2) & (np.diff(noiseindice) != 3))[0]+1 ) + #print(noiseindice) + noiseindice = [x for arr in noiseindice if len(arr) > 20 for x in arr[50:-51]] + noiseindice= np.array(noiseindice) + #print(noiseindice) + fish = np.array(fish) + # Noise delete applial + # if len(noiseindice) >0 : + # fish[:,noiseindice] = np.nan #np.setdiff1d(np.arange(0, len(fish[0]),1),(noiseindice))] = np.nan + fish = list(fish) + #plt.plot(temp_classisi) + # plt.show() + binlen = 60 + #print(len(fish[0])) + if discardcondition1(fish) == False: # condition length < 10 + # if False: + mean, std, d2, d8 = bin_array_mean(temp_classisi,binlen) + # print('mean, std, d2, d8', mean, std, d2, d8) + count = ((mean * 4 >= d8) * (d2 >= mean * 0.25)) .sum() # condition_2 : if 0.2, and 0.8 deciles of the ISI of ONE SECOND/binlen are in the area of the median by a factor of 2, then the class seems to have not too much variability. + # Problem: Case, Frequency changes rapidly during one second/binlen , then the 0.8 or 0.2 will be out of the area... + # But then there is one wrong estimation, not too much of a problem + #print('fish') + # if count >= 0.5*(len(temp_classisi)//binlen +1): + if True: + fishes.append(fish) + #print('len fishes after append', len(fishes)) + #print('printing fishes to check structure', fishes[0][0]) + #ontimes = np.load('ontime'+filename[prefixlen:-4]+'.npz') + #ontime = [] + ## for c, items in enumerate(ontimes.items()): + ## ontime.append(items[1]) + #ontime.append(ontimes['on']) + #ontime.append(ontimes['near']) + #ontime.append(ontimes['far']) + + #if plot == 1: + # plot_ontimes(ontime) + + #print(eods[0][-1]//samplerate, len(ontime[0])) + if fishes is not None: + + #for fish in fishes: + # fish[0] + + # improving the fishpeak-data by adding peaks at places where theses peaks are hidden behind other (stronger)peaks + #fishes = fill_hidden_3(fishes, eods, filename) # cl-dict : x y z -dict + # filling holes or removing unexpected peaks from the class which are most likely caused by false classification + #fishes, weirdparts = fill_holes(fishes) + #fishes, weirdparts = fill_holes(fishes) + + if fishes is not None: + if len(fishes) > 0: + for cl, fish in enumerate(fishes): + ### Filter to only get ontimes close and nearby + for i, x in enumerate(fish[0]): + print(x) + #if x//samplerate < len(ontime[0]): +# # print(ontime[1][x//samplerate], ontime[0][x//samplerate]) + # if ontime[0][x//samplerate] != 1 and ontime[1][x//samplerate] != 1 and ontime[2][x//samplerate] != 1: + # for feat_i, feature in enumerate(fish): + # fishes[cl][feat_i][i] = np.nan + # print(x//samplerate, ' ignored') + isi = [isi for isi in np.diff(fishes[cl][0])] + isi.append(isi[-1]) + fishes[cl].append(isi) + #fishes[i] # the structure of the array fishes + # 0 x + # 1 y + # 2 h + # 3 isi + npFishes = fishes + + + # fishfeaturecount = len(fishes[cl]) + # for cl in range(len(np.unique(classlist))-1): + # + # fishlen = len(fishes[cl][0]) + # npFishes[cl]= np.memmap(filename[prefixlen:-4]+"_Fish%d"%cl+ ".npmmp", dtype='float32', mode='w+', shape=(fishfeaturecount, fishlen), order = 'F') + # np.zeros([fishfeaturecount, len(fishes[cl]['x'])]) + # for i, feature in enumerate(['x', 'y', 'h', 'isi']): #enumerate(fishes[cl]): + # if feature == 'isi': + # fishes[cl][feature].append(fishes[cl][feature][-1]) + # npFishes[cl][i] = np.array(fishes[cl][feature]) + # + +# np.set_printoptions(threshold=np.nan) + # + if save == 1 and not os.path.exists('classes/'): + os.makedirs('classes/') + + #np.save('classes/'+ filename[prefixlen:-4]+"_class%d"%i, fish) + #print('this', len(npFishes)) + if save == 1: + with open('classes/'+ filename[prefixlen:-4]+"_classes.lst", "wb") as fp: #Pickling + pickle.dump(npFishes, fp) + #np.savez('classes/'+ filename[prefixlen:-4]+"_classes", npFishes) + else: + npFishes = [] + try: + with open('classes/'+ filename[prefixlen:-4]+"_classes.lst", "rb") as fp: #Pickling + npFishes = pickle.load(fp) + # npFishload=np.load('classes/'+ filename[prefixlen:-4]+"_classes.npz") + print('loaded classes') + except: + print('no classes found') + # for fishes in npFishload.files: + # print('loaded ', fishes) + # for fish in npFishload[fishes]: + # fishtemp = np.zeros([4,len(fish[0])]) + # for i, fishfeature in enumerate(fish): + # fishtemp[i] = fishfeature + # npFishes.append(fishtemp) + #print('npFishes to check structure', npFishes[0][0][0]) +# if not os.path.exists('classes/'): +# os.makedirs('classes/') +# if not os.path.exists('classes/'+ filename[prefixlen:-4]+"_classes_red"): +#np.save('classes/'+ filename[prefixlen:-4]+"_class%d"%i, fish) + if new == 1 or not os.path.exists('classes/'+ filename[prefixlen:-4]+"_classes_red.lst"): +# reducednpFishes = npFishes + reducednpFishes = reduce_classes(npFishes)# reducing classes by putting not overlapping classes together + #print('reduced') + if save == 1: + with open('classes/'+ filename[prefixlen:-4]+"_classes_red.lst", "wb") as fp: #Pickling + pickle.dump(reducednpFishes, fp) + #np.savez('classes/'+ filename[prefixlen:-4]+"_classes_red.npz", reducednpFishes) + else: + with open('classes/'+ filename[prefixlen:-4]+"_classes_red.lst", "rb") as fp: #Pickling + reducednpFishes = pickle.load(fp) + #print('len reduced ', len(reducednpFishes)) + if len(reducednpFishes) == 0: + print('no on-/ or nearbytimeclass with sufficient length or good enough data. quitting') + quit() +# reducednpFishload=np.load('classes/'+ filename[prefixlen:-4]+"_classes_red.npz") +# +# for fishes in reducednpFishload.files: +# print('loaded reduced classes') +# for fish in reducednpFishload[fishes]: +# fishtemp = np.zeros([4,len(fish[0])]) +# for i, fishfeature in enumerate(fish): +# fishtemp[i] = fishfeature +# reducednpFishes.append(fishtemp) +# +# for i, rfish in enumerate(reducednpFishes): +# if not os.path.exists('classes/'): +# os.makedirs('classes/') +# np.save('classes/'+ filename[prefixlen:-4]+"_class%d_reduced"%i, rfish) + #print('reducednpFishes to check structure', reducednpFishes[0][3]) + + + + window_freq = 1 + freqavgsecpath = filename[prefixlen:-4]+"_freqs2_lp.npy" + if new == 1 or not os.path.exists(freqavgsecpath): + print('new freq calcing') + avg_freq = np.zeros([len(reducednpFishes),datalen//int((samplerate*window_freq))+1]) + avg_isi = np.zeros([len(reducednpFishes),datalen//int(samplerate*window_freq)+1]) + for i, fish in enumerate(reducednpFishes): + if len(fish[0]) >= 5: + fish = np.array(fish) + avg_freqs_temp = [] + avg_isi_temp = [] + peak_ind = 0 + sec = 0 + for secx in np.arange(fish[0][0],fish[0][-1], samplerate*window_freq): + #count_peaks_in_second = ((secx < fish[0]) & (fish[0] < secx+samplerate*window_freq)).sum() + # isimean_peaks_in_second = fish[3][(secx < fish[0]) & (fish[0] < secx+samplerate*window_freq)].mean() # # # # # # # # # Using median instead of mean. Thus, hopefully overgoing outlier-isis, which are due to Peaks hidden beneath stronger Peaks of another fish. + #freq_in_bin = samplerate/isimean_peaks_in_second + sec_peaks = fish[3][(secx < fish[0]) & (fish[0] < secx+samplerate*window_freq)] + #sec_freq = np.divide(samplerate,sec_peaks) + print(sec_peaks) + if len(sec_peaks) > 0: + perctop, percbot = np.percentile(sec_peaks, [45, 55]) + #peakisi_in_bin = sec_peaks[(perctop>=sec_peaks)&(sec_peaks>=percbot)].mean() + #print(perctop, percbot, peaks_in_bin) + #isimean_peaks_in_bin = sec_peaks[(perctop >=sec_peaks)&(sec_peaks>=percbot)].mean() + isimean_peaks_in_bin = np.median(sec_peaks) + freq_in_bin = samplerate/isimean_peaks_in_bin + else: freq_in_bin = np.nan + ################################################################################################################################### TODO + #isimean_peaks_in_bin = np.median(fish[3][(secx < fish[0]) & (fish[0] < secx+samplerate*window_freq)]) + print(freq_in_bin) + #freq_in_bin = count_peaks_in_second + if 5 < freq_in_bin < 140: + avg_freqs_temp.append(freq_in_bin) + else: + avg_freqs_temp.append(np.nan) + sec+=1 + #print(sec, freq_in_bin) + # avg_freqs_temp, noiseindice = noisedelete_smoothing(avg_freqs_temp, 3, 2, 100000, 1000) + avg_freqs_temp, noiseindice = noisedelete_lowpass(avg_freqs_temp, binlen= 10) + avg_freq[i, int(fish[0][0])//int(samplerate*window_freq) : int(fish[0][0])//int(samplerate*window_freq)+sec] = np.array(avg_freqs_temp) + #plt.show() + + if save == 1: + np.save(freqavgsecpath, avg_freq) + else: + avg_freq = np.load(freqavgsecpath) + print('loaded freqs') + #for i in avg_isi_fish: + # print('avg_freqs_byisi') + # plt.plot(i) + #plt.xlabel('seconds') + #plt.ylabel('isi of peaks') + #plt.show() + # cmap = plt.get_cmap('jet') + # colors =cmap(np.linspace(0, 1.0, 3000)) #len(np.unique(classlist)))) + # np.random.seed(22) + # np.random.shuffle(colors) + # colors = [colors[cl] for cl in range(len(avg_freq_fish))] + # for i, col in zip(avg_freq_fish, colors): + # print('avg_freqs', 'len:' ,len(avg_freq_fish)) + # plt.plot(i, color = col) + # plt.xlabel('seconds') + # plt.ylabel('frequency of peaks') + # plt.show() + ## #print(avg_freqs[0]) + + + window_avg = 1 + ampavgsecpath = filename[prefixlen:-4]+'_amps2_lp.npy' + #freqtime = np.arange(0, len(data), samplerate) + if new == 1 or not os.path.exists(ampavgsecpath): + avg_amps_temp = [] + peak_ind = 0 + + avg_amp = np.zeros([len(reducednpFishes),int(datalen)//int(samplerate*window_avg)+1]) + #avg_amp_fish = np.memmap(ampavgsecpath, dtype='float32', mode='w+', shape=(len(reducednpFishes),datalen//samplerate+1)) + + for i, fish in enumerate(reducednpFishes): + if len(fish[0]) >= 100: + #print('amp, ', i, '/', len(reducednpFishes)) + step = 0 + avg_amps_temp = [] + for secx in np.arange(fish[0][0],fish[0][-1], samplerate*window_avg): + amp_in_second = fish[2][(secx < fish[0]) & (fish[0] < secx+samplerate*window_avg)].mean() + # print(i, peak_ind, amp_in_second) + avg_amps_temp.append(amp_in_second) + step+=1 + #print('avg_amps_temp', avg_amps_temp) + #avg_amps = np.memmap(ampavgsecpath, dtype='float32', mode='w+', shape=(len(avg_amps_temp), )) + #avg_amps[:] = avg_amps_temp + + avg_amps_temp = np.array(avg_amps_temp) + avg_amps_temp[np.where(np.isnan(avg_amps_temp))] = 0.0 + avg_amp[i, int(fish[0][0])//int(samplerate*window_avg) : int(fish[0][0])//int(samplerate*window_avg)+step] = avg_amps_temp + + if save == 1: + np.save(ampavgsecpath, avg_amp) +# np.save(ampavgsecpath, avg_amp_fish) + # print('avg_amps ',avg_amps) + #avg_freqs.append(np.mean(eods_freq[i:i+samplerate])) + else: + #avg_amps = np.memmap(ampavgsecpath, dtype='float32', mode='r', shape=(data//samplerate)) + avg_amp = np.load(ampavgsecpath) + print('loaded amp') + + if new == 1 or plot == 1 or save == 1: + # Plotting ####################################################################################################################### + ################################################################################################################################## + + plt.style.use('../PaperWholeFig.mplstyle') + import matplotlib.gridspec as gridspec + gs = gridspec.GridSpec(2, 2, height_ratios=(1, 1), width_ratios=(1, 0.02), wspace = 0.03) + + # Tuning colors + maxfreq = 140 + coloroffset = 5 + # Colorbar Choice + cmap = plt.get_cmap('magma')#'gist_rainbow') + cmap_amp = plt.get_cmap('Blues')#'gist_rainbow') + # Colorbar Workaround + Z = [[0,0],[0,0]] + min, max = (0, maxfreq) + step = 1 + levels = np.arange(min,max+step,step) + CS3 = plt.contourf(Z, levels, cmap=cmap) + plt.clf() + plt.close() + ##################### + # Colorbar Workaround + Z = [[0,0],[0,0]] + min, max = (0, 1) + step = 1/100 + levels = np.arange(min,max+step,step) + CSa = plt.contourf(Z, levels, cmap=cmap_amp) + plt.clf() + plt.close() + ##################### + # mapping colormap onto fixed array of frequencyrange + step = 1/maxfreq + collist = cmap(np.arange(0, 1+step, step)) + ampstep = 1/200 + collist_amp = cmap_amp(np.arange(0, 1+ampstep, ampstep)) + collist_amp = collist_amp[100:]#[::-1] + print(collist[0], collist[-1], collist[140]) + + # plt.rcParams['figure.figsize'] = 20,4.45 + + ampax = plt.subplot(gs[1,:-1]) + #freqax = ampax.twinx() + freqax = plt.subplot(gs[0,:-1], sharex=ampax) + barax = plt.subplot(gs[1,-1]) + ampbarax = plt.subplot(gs[0,-1]) + avg_freq[ avg_freq == 0 ] = np.nan + avg_amp[ avg_amp == 0 ] = np.nan + # colorlist = np.zeros([len(avg_freq)]) + # valuecount = 0 + + # remove amp where freq is np.nan + # might actually not belong in the plotting section.. + for f, a in zip(avg_freq, avg_amp): + a[np.isnan(f)] = np.nan + + for f, a in zip(avg_freq, avg_amp): + myred='#d62728' + myorange='#ff7f0e' + mygreen='#2ca02c' + mylightgreen="#bcbd22" + mygray="#7f7f7f" + myblue='#1f77b4' + mylightblue="#17becf" + newlightblue = "#e1f7fd" + # getting the right color for each scatterpoint + fc = f[~np.isnan(f)] + #collist = np.append(np.array([collist[0,:]]*30),(collist[30:]), axis = 0) + fc[fc > maxfreq] = maxfreq + fc[fc < coloroffset] = np.nan + #collist = np.append(np.array([collist[0,:]]*coloroffset),(collist[coloroffset:]), axis = 0) + #col = [collist[v-coloroffset] if c >= coloroffset else collist[0] for v in fc if coloroffset <= v <= maxfreq] + for v in fc: + print(v) + col = [collist[int(v)] for v in fc[~np.isnan(fc)]] + ampcol = [collist_amp[int(v*100/2)] for v in a[~np.isnan(a)]] + # plotting + l1 = ampax.scatter(np.arange(0, len(a)*window_avg, window_avg) ,a, s = 1,label = 'amplitude', color = col)#colors[col], ls = ':') + l2 = freqax.scatter(np.arange(0,len(f)*window_freq,window_freq),f, s = 1, label = 'frequency', color = ampcol)#colors[col]) + # ls = l1+l2 + #labels = [l.get_label() for l in ls] + # ampax.legend(ls, labels, loc=0) + ampax.set_xlabel('Time [s]') + ampax.set_ylabel('Amplitude') + freqax.set_ylabel('Frequency') + freqbar = plt.colorbar(CS3, cax = barax) + ampbar = plt.colorbar(CSa, cax = ampbarax) + ampbar.set_clim(-0.3,1) + freqbar.set_ticks([0,30,60,90,120]) + ampbar.set_ticks(np.arange(0,1,0.249)) + ampbar.set_ticklabels([0,0.5,1,1.5,2]) + freqax.set_xlim(0,len(a)*window_avg) + freqax.set_ylim(0,maxfreq) + ampax.set_ylim(0, 2) + ampax.set_xlim(0, len(a)*window_avg) + plt.setp(freqax.get_xticklabels(), visible=False) + # remove last tick label for the second subplot + yticks = ampax.yaxis.get_major_ticks() + yticks[-1].label1.set_visible(False) + plt.subplots_adjust(hspace=.0) + freqax.get_yaxis().set_label_coords(-0.05, 0.5) + ampax.get_yaxis().set_label_coords(-0.05, 0.5) + plt.gcf().subplots_adjust(bottom=0.15) + ampbar.ax.get_yaxis().set_label_coords(4,0.5) + ampbar.ax.set_ylabel('Amplitude', rotation=270) + freqbar.ax.get_yaxis().set_label_coords(4,0.5) + freqbar.ax.set_ylabel('Frequency', rotation=270) + + + # freqax.set_xlim(20600,21800) + # ampax.set_xlim(20600, 21800) + # freqax.set_ylim(80,125) + + print('plot', plot) + if plot == 1: + print('show plot') + plt.show() + if save == 1: + plt.savefig(filename[prefixlen:-4]+'_AmpFreq7_lp.pdf') + print('saved') + plt.clf() + else: + print('already saved figure, if you want to see the result start with plot == 1') + + +def bin_percentilediff(data, binlen): + data = np.array(data) + return np.percentile(data[:(data.size // binlen) * binlen].reshape(-1, binlen),95, axis=1) - np.percentile(data[:(data.size // binlen) * binlen].reshape(-1, binlen), 5 , axis=1) + +def bin_median(data, binlen): + return np.median(data[:(data.size // binlen) * binlen].reshape(-1, binlen),axis=1) +def bin_mean(data, binlen): + return np.mean(data[:(data.size // binlen) * binlen].reshape(-1, binlen),axis=1) + # window_bigavg = 300 + # big_bin = [] + # for i in np.arange(0,len(avg_freq[0]),window_bigavg): # print('iiii?', i) + # collector = [] + # for f, a, col in zip(avg_freq, avg_amp, colorlist): + # for data in f[i//window_freq:(i+window_bigavg)//window_freq]: + # if data != 0 and not np.isnan(data): + # collector.append(data) + # print(collector) + # if len(collector) >100: + # big_bin.append(collector) + # for part in big_bin: + # print('i') + # plt.hist(part, bins = 250, range = (0,250)) + # plt.show() + + +def bin_ratio_std_mean(array, binlen): + #print( bin_array_std(array, binlen)/bin_array_mean(array,binlen) ) + mean, std, d2, d8 = bin_array_mean(array,binlen) + #print('mean, std, d2, d8', mean, std, d2, d8) + return mean * 2 > d8 > mean > d2 > mean * 0.5 + + +def bin_array_std(array, binlen): + bins = len(array)//binlen + stds = np.zeros((bins+1)) + #print(array[0: binlen]) + for i in range(len(stds)): + stds[i] = np.std(array[i*binlen: (i+1)*binlen]) + #print('stds0', stds[0], len(array)) + return stds + + +def bin_array_mean(array, binlen): + bins = len(array)//binlen +1 if len(array) % binlen != 0 else len(array)//binlen + means = np.zeros((bins)) + #print(array[0: binlen]) + stds = np.zeros((bins)) + d2 = np.zeros((bins)) + d8 = np.zeros((bins)) + for i in range(bins): + stds[i] = np.std(array[i*binlen: (i+1)*binlen]) + means[i] = np.median(array[i*binlen: (i+1)*binlen]) + d2[i] = np.percentile(array[i*binlen: (i+1)*binlen], 20) + d8[i] = np.percentile(array[i*binlen: (i+1)*binlen], 80) + + # means[i] = np.mean(array[i*binlen: (i+1)*binlen]) + #print('mean0',means[0], len(array)) + return means, stds, d2, d8 + + + + +def bin_ndarray(ndarray, new_shape, operation='sum'): + """ + Bins an ndarray in all axes based on the target shape, by summing or + averaging. + + Number of output dimensions must match number of input dimensions and + new axes must divide old ones. + + Example + ------- + >>> m = np.arange(0,100,1).reshape((10,10)) + >>> n = bin_ndarray(m, new_shape=(5,5), operation='sum') + >>> print(n) + + [[ 22 30 38 46 54] + [102 110 118 126 134] + [182 190 198 206 214] + [262 270 278 286 294] + [342 350 358 366 374]] + + """ + operation = operation.lower() + if not operation in ['sum', 'mean', 'std']: + raise ValueError("Operation not supported.") + if ndarray.ndim != len(new_shape): + raise ValueError("Shape mismatch: {} -> {}".format(ndarray.shape, + new_shape)) + compression_pairs = [(d, c//d) for d,c in zip(new_shape, + ndarray.shape)] + + #print(len(new_shape)) + flattened = [l for p in compression_pairs for l in p] + + ndarray = ndarray.reshape(len(flattened)) + for i in range(len(new_shape)): + op = getattr(ndarray, operation) + ndarray = op(-1*(i+1)) + return ndarray + + + + + + +def fill_hidden_3(fishes, eods, filename): + fishes = fishes + #print('hidden_calcing...') + nohidefishes = [] + for cl, fish in enumerate(fishes): + #print('Step1: Fish ', cl, ' ', cl, ' / ', len(fishes)) + #f = np.memmap(filename[prefixlen:-4]+"_Fish%d"%cl+ "X.npmmp", dtype='float32', mode='w+', shape=(3,len(fish[0])*2), order = 'F') + f = np.zeros([3, len(fish[0])*2]) + fishisi = np.diff(fish[0]) + isi = fishisi[0] + lst_offst =0 + for i, newisi in enumerate(fishisi): + # print(cl, ' ..currently peak ', i, ' / ' , len(fishisi)) + newi = i+lst_offst + if newi > len(f[0])-1: # Errör + # print('Oh shit, nparray to small. doubling size') + f_new = np.empty([3,len(f[0])*2]) + f_new[:,:len(f[0])]=f + f = f_new + f[0][newi]=fish[0][i] + f[1][newi]=fish[1][i] + f[2][newi]=fish[2][i] + +# print(i, newi) + + + # print(cl, fish[0][i], isi, newisi) + if newisi > 2.8*isi: + guessx = fish[0][i] + isi + while guessx < fish[0][i] + newisi-0.8*isi: + peakx = peakaround3(guessx, isi*0.1, eods) + if peakx is not None: + newi = i+lst_offst + f[0][newi+1]=peakx + f[1][newi+1]=fish[1][i] + f[2][newi+1]=fish[2][i] + #print('estimated hidden peak: ', f[0][newi+1], f[2][newi+1]) + guessx = peakx + isi + (peakx-guessx) + lst_offst +=1 + #print('offset+1 at' ,i , peakx) + continue + break + isi = newisi + + + + nohidefishes.append(np.array([f[0,0:newi+1],f[1,0:newi+1],f[2,0:newi+1]])) + + + #print(x[0], x[200]) + return nohidefishes + + +def fill_hidden_Not(fishes, eods, filename): + fishes = fishes + #print('hidden_calcing...') + nohidefishes = [] + #for cl, fish in enumerate(fishes): + #print('Step1: Fish ', cl, ' ', cl, ' / ', len(fishes)) + #f = np.memmap(filename[prefixlen:-4]+"_Fish%d"%cl+ "X.npmmp", dtype='float32', mode='w+', shape=(3,len(fish[0])*2), order = 'F') + return nohidefishes + +def noisedelete_smoothing(array, binlen, method, thr1, thr2): + if len(array) <= 2: + if np.mean(array) > 140: + for a in array: + a = np.nan + return array, np.arange(0, len(array), 1) + temp_classisi = array + if len(array) > 11: + smoothed = savgol_filter(temp_classisi, 11, 1) + else: smoothed = savgol_filter(temp_classisi, 3, 1) + diff = np.square(smoothed-temp_classisi) + data = np.array(diff) + #plt.plot(diff, color = 'green') + result = np.median(data[:(data.size // binlen) * binlen].reshape(-1, binlen),axis=1) + result2 = bin_percentilediff(temp_classisi, binlen) + if method == 1: + result = result + elif method == 2: + result = result2 + if len(result) > 7: + smoothedresult = savgol_filter(result, 7, 1) + else: + smoothedresult = result + #plt.plot(np.arange(0,len(result)*binlen, binlen),result) + #plt.plot(smoothed) + #plt.plot(np.arange(0,len(result2)*20, 20), smoothedresult2) + #plt.plot(np.arange(0,len(result2)*20, 20), result2) + # plt.plot(temp_classisi, color = 'black') + # plt.plot(np.arange(0, len(result)*binlen, binlen),smoothedresult, 'red') + if method ==1 : + noiseindice = np.where(smoothedresult > thr1) + elif method == 2: + noiseindice = np.where(result > thr2)[0] + elif method == 3: + noiseindice = np.where(data > 1000) + print(noiseindice) + noiseindice = np.multiply(noiseindice, binlen) + print(noiseindice) + noiseindice = [x for i in noiseindice for x in range(i, i+binlen)] + print(np.diff(noiseindice)) + noiseindice = np.split(noiseindice, np.where((np.diff(noiseindice) != 1 ) & (np.diff(noiseindice) != 2) & (np.diff(noiseindice) != 3))[0]+1 ) + #print(noiseindice) + noiseindice = [x for arr in noiseindice if len(arr) > 1 for x in arr] + noiseindice= np.array(noiseindice) + #print(noiseindice) + array = np.array(array) + # Noise delete applial + if np.median(array) > 150: + noiseindice = np.arange(0, len(array), 1) + if len(noiseindice) > 0: + array[noiseindice] = np.nan + return array, noiseindice + +def noisedelete_lowpass(array,binlen): + origarray = array + if len(array) <= 5: + if np.mean(array) > 140 or np.mean(array) < 15: + for a in array: + a = np.nan + return array, [] #np.arange(0, len(array), 1) + array = np.array(array) + from scipy.signal import butter, lfilter + indice = [] + alldata = np.empty_like(array) + if len(array[np.isnan(array)]) > 0: + arrays = np.split(array, np.where(np.abs(np.diff(np.isnan(array))) == 1)[0]+1) + indice = np.where(np.abs(np.diff(np.isnan(array))) == 1)[0]+1 + indice = np.append(np.array([0]),indice) + else: + arrays = [array] + indice = [0] + for array,index in zip(arrays, indice): + if len(array) <2 or len(array[np.isnan(array)]) > 0: + alldata[index:index + len(array)] = array[:] + continue + print(array, 'array') + fs = 100 + cutoff = 25 + binlen = binlen + data = np.array(array, dtype = 'float64') + overlap = len(data)%binlen + if overlap > 0: + data = np.append(data, np.array([data[-1]]*(binlen-overlap)), axis = 0) + dataext = np.empty([data.shape[0]+20]) + dataext[:10]= data[0] + dataext[-10:] = data[-1] + dataext[10:-10]=data + B, A = butter(1, cutoff/ (fs / 2), btype = 'low') + #lpf_array = np.empty_like(dataext) + lpf_array= lfilter(B, A, dataext, axis = 0) + lpf_array = lfilter(B, A, lpf_array[::-1])[::-1] + lpf_binned_array = lpf_array[:(data.size // binlen) * binlen].reshape(-1, binlen) + lpf_array = lpf_array[10:-10] + if overlap > 0: + lpf_array[-(binlen-overlap):] = np.nan + data[-(binlen-overlap):] = np.nan + binned_array = data[:(data.size // binlen) * binlen].reshape(-1, binlen) + lpf_binned_array = lpf_array[:(data.size // binlen) * binlen].reshape(-1, binlen) + filterdiffs = np.empty([binned_array.shape[0]]) + #a = signal.firwin(1, cutoff = 0.3, window = "hamming") + for i, (bin_content, bin_filtered) in enumerate(zip(binned_array, lpf_binned_array)): + if i == binned_array.shape[0] - 1: + bin_content = bin_content[:-(binlen-overlap)] + bin_filtered = bin_filtered[:-(binlen-overlap)] + filterdiffs[i] = np.mean(np.square(np.subtract(bin_filtered[~np.isnan(bin_filtered)], bin_content[~np.isnan(bin_content)]))) + # filterdiff = filterdiff / len(bin_content) + print(filterdiffs) + binned_array[filterdiffs > 1, :] = np.nan + if overlap > 0: + data = binned_array.flatten()[:-(binlen-overlap)] + else: + data = binned_array.flatten() + print(data, 'data') + alldata[index:index + len(data)] = data + # twin[np.isnan(data)] = np.nan + # plt.plot(alldata, color = 'red') + # plt.plot(np.add(origarray, 2), color = 'blue') + # plt.ylim(0, 150) + # plt.show() + return alldata, [] + + # noiseindice = np.multiply(noiseindice, binlen) + # print(noiseindice) + # noiseindice = [x for i in noiseindice for x in range(i, i+binlen)] + # print(np.diff(noiseindice)) + # noiseindice = np.split(noiseindice, np.where((np.diff(noiseindice) != 1 ) & (np.diff(noiseindice) != 2) & (np.diff(noiseindice) != 3))[0]+1 ) + + # #print(noiseindice) + # noiseindice = [x for arr in noiseindice if len(arr) > 1 for x in arr] + # noiseindice= np.array(noiseindice) + # #print(noiseindice) + # array = np.array(array) + # # Noise delete applial + # if np.median(array) > 150: + # noiseindice = np.arange(0, len(array), 1) + # if len(noiseindice) > 0: + # array[noiseindice] = np.nan + # return array, noiseindice + + +def peakaround3(guessx, interval, eods): + pksinintv = eods[0][ ((guessx-interval < eods[0]) & (eods[0] < guessx+interval))] + if len(pksinintv)>0: + return(pksinintv[0]) + elif len(pksinintv) >1: + pksinintv = pksinintv[np.argmin(abs(pksinintv - guessx))] + return(pksinintv) ## might be bad, not tested + # for px in fish[0]: + # distold = interval + # if px < guessx-interval: + # continue + # # print('in area', guessx-interval) + # if guessx-interval < px < guessx+interval: + # found = True + # dist = px-guessx + # if abs(dist) < abs(distold): + # distold = dist + # if px > guessx+interval: + # + # if found == True: + # print(guessx, dist) + # time.sleep(5) + # return guessx + dist + # + # else: + # + # break + return None + + + +def fill_holes(fishes): #returns peakx, peaky, peakheight # Fills holes that seem to be missed peaks in peakarray with fake (X/Y/height)-Peaks + retur = [] + lost = [] + + #print('fill_holes fishes', fishes) + + for cl, fish in enumerate(fishes): + #print('Step2: Fish', cl) + fishisi = np.diff(fish[0]) + mark = np.zeros_like(fishisi) + isi = 0 + #print('mark', mark) + # print('fishisi' , fishisi) + #find zigzag: + c=0 + c0= 0 + n=0 + for i, newisi in enumerate(fishisi): + # print(newisi, isi) + if abs(newisi - isi)>0.15*isi: ## ZigZag-Detection : actually peaks of two classes in one class - leads to overlapping frequencys which shows in a zigzag pattern + if (newisi > isi) != (fishisi[i-1] > isi): + c+=1 + # print(abs(newisi - isi), 'x = ', fish[i].x) + c0+=1 + elif c > 0: + n += 1 + if n == 6: + if c > 6: + # print ('zigzag x = ', fish['x'][i-6-c0], fish['x'][i-6]) + mark[i-6-c0:i-6]= -5 + c = 0 + c0=0 + n = 0 + + #if c > 0: + # print(i, c) + # if c == 6: + # print('zigzag!') + isi = newisi + isi = 0 + for i, newisi in enumerate(fishisi): ## fill holes of up to 3 Peaks # Changed to: Only up to 1 Peak because : Holes might be intended for communicational reasons + #print('mark: ' , mark) + if mark[i] == -5: continue + if i+2 >= len(fishisi): + continue + if (2.2*isi > newisi > 1.8*isi) and (1.5*isi>fishisi[i+1] > 0.5*isi) : + mark[i] = 1 + isi = newisi + # print('found 1!' , i) + elif (2.2*isi > newisi > 1.8*isi) and (2.2*isi> fishisi[i+1] > 1.8*isi) and (1.5*isi > fishisi[i+2] > 0.5*isi): + mark[i] = 1 + isi = isi + #elif 3.4*isi > newisi > 2.6*isi and 1.5*isi > fishisi[i+1] > 0.5*isi: + # mark[i] = 2 + + elif (0.6* isi > newisi > 0): + # print('-1 found', i ) + if mark[i] ==0 and mark[i+1] ==0 and mark[i-1]==0 : + # isi newisi + # continue + # print('was not already set') + if fishisi[i-2] > isi < fishisi[i+1]: + mark[i] = -1 + # print('-1') + elif isi > fishisi[i+1] < fishisi[i+2]: + mark[i+1] = -1 + # print('-1') + isi = newisi + x = [] + y = [] + h = [] + x_lost=[] + y_lost=[] + h_lost=[] + # print('filledmarks: ', mark) + for i, m in enumerate(mark): + if m == -1 : + # print('-1 at x = ', fish['x'][i]) + continue + if m == -5: + x_lost.append(fish[0][i]) + y_lost.append(fish[1][i]) + h_lost.append(fish[2][i]) + x.append(fish[0][i]) + y.append(fish[1][i]) + h.append(fish[2][i]) + continue + x.append(fish[0][i]) + y.append(fish[1][i]) + h.append(fish[2][i]) + if m == 1: + # print('hofly added peak at x = ' , fish['x'][i]) + x.append(fish[0][i] + fishisi[i-1]) + y.append( 0.5*(fish[1][i]+fish[1][i+1])) + h.append(0.5*(fish[2][i]+fish[2][i+1])) + elif m== 2: + x.append(fish[0][i] + fishisi[i]) + y.append( 0.5*(fish[1][i]+fish[1][i+1])) + h.append(0.5*(fish[2][i]+fish[2][i+2])) + x.append(fish[0][i] + 2*fishisi[i-1]) + y.append( 0.5*(fish[1][i]+fish[1][i+2])) + h.append(0.5*(fish[2][i]+fish[2][i+2])) + # print('added at x = ', fish[0][i] + fishisi[i]) + x = np.array(x) + y= np.array(y) + h = np.array(h) + x_lost = np.array(x_lost) + y_lost = np.array(y_lost) + h_lost = np.array(h_lost) + #print('retur', x, y, h) + retur.append([x,y,h]) + lost.append([x_lost,y_lost,h_lost]) + # filledpeaks =np.array(filledpeaks) + # print(filledpeaks.shape) + # filledpeaks. + return retur, lost + + +# eods[-len(thisblock_eods[:,]):] = thisblock_eods +# eods = np.memmap("eods_"+filename[:-3]+"npy", dtype='float32', mode='r+', shape=(4,eods_len)) + #fp = np.memmap(filepath[:-len(filename)]+"eods_"+filename[:-3]+"npy", dtype='float32', mode='r+', shape=(4,len(thisblock_eods[:,]))) + #nix print( b.data_arrays) + # for cl in np.unique(cllist): + # currentfish_x = x[:][cllist == cl] + # currentfish_y = y[:][cllist == cl] + # currentfish_h d= x[:][cllist == cl] + #nix try: + #nix xpositions[cl] = b.create_data_array("f%d_eods" %cl, "spiketimes", data = currentfish_x) + #nix xpositions[cl].append_set_dimension() + #nix # thisfish_eods = b.create_multi_tag("f%d_eods_x"%cl, "eods.position", xpositions[cl]) + #nix # thisfish_eods.references.append(nixdata) + #nix except nix.pycore.exceptions.exceptions.DuplicateName: + #nix + #nix xpositions[cl].append(currentfish_x) + + + #thisfish_eods.create_feature(y, nix.LinkType.Indexed) + #b.create_multi_tag("f%d_eods_y"%cl, "eods.y", positions = y) + #b.create_multi_tag("f%d_eods_h"%cl, "eods.amplitude", positions = h) + #thisfish_eods.create_feature + + #nix file.close() + # Save Data + # Needed: + # Meta: Starttime, Startdate, Length + # x, y, h, cl, difftonextinclass -> freq ? , + + # Later: Find "Nofish" + # Find "Twofish" + # Find "BadData" + # Find "Freqpeak" + # ? Find "Amppeak" + # + + # bigblock = np.array(bigblock) + # x=xarray(bigblock) + # y=yarray(bigblock) + # cl=clarray(bigblock) + + + #nix file = nix.File.open(file_name, nix.FileMode.ReadWrite) + #nix b = file.blocks[0] + #nix nixdata = b.data_arrays[0] + #nix cldata = [] + #nix print(classes) + #nix print(b.data_arrays) + #nix for i in range(len(np.unique(classes))): + #nix cldata.append(b.data_arrays[i+1]) + + + # for cl in + + # for cl in + # x = thisfish_eods + + + #nix file.close() + + + +def reduce_classes(npFishes): + offtimeclasses = [] + for i, fish in enumerate(npFishes): + fish = np.array(fish) + #print(fish[0]) + # print('nüFishes before and after command') + # print('bef', npFishes[i][0][0]) + # print(fish[:,:][:,np.where(~np.isnan(fish[0]))].reshape(4,-1)) + npFishes[i] = fish[:,:][:,np.where(~np.isnan(fish[0]))][:,0] + # print('after', npFishes[i][0][0]) + if len(npFishes[i][0]) <= 100: + offtimeclasses.append(i) + #print('delete class ', i) + #print('Len offtime vs len Fishes', len(offtimeclasses), len(npFishes)) + for index in sorted(offtimeclasses, reverse=True): + del npFishes[index] + #print('npFishes to check features', npFishes[0][3]) + srt_beg = sort_beginning(npFishes) + # print(len(npFishes[0])) + # print(len(srt_beg)) + #srt_end = sort_ending(npFishes) + if len(srt_beg) >= 1: + reduced = [] + reduced.append(srt_beg[0]) + #for i, fish in enumerate(srt_beg): + #print(len(srt_beg)) + #print('reducing classes') + for i in range(1, len(srt_beg)): + #print('.', end = '') + cl = 0 + reducedlen_beg = len(reduced) + while cl < reducedlen_beg: + cond1 = reduced[cl][0][-1] < srt_beg[i][0][0] + cond2 = False + nxt=i+1 + while nxt < len(srt_beg) and srt_beg[i][0][-1] > srt_beg[nxt][0][0]: #part ends after another part started (possibly in the other part. + if len(srt_beg[nxt][0]) > len(srt_beg[i][0]):# -> lencheck to pick longer part) + reduced.append(srt_beg[i]) + # print('case1') + break + nxt+=1 + else: + cond2 = True + # print('lenreduced', len(reduced), len(srt_beg)) + #print(i, cl, cond1, cond2 ) + if cond1 and cond2: + #print(reduced[cl].shape, srt_beg[i].shape) + reduced[cl] = np.concatenate((reduced[cl],srt_beg[i]), axis=1) + #print(len(reduced[cl][0]), len(srt_beg[i][0])) + cl+=1 + break + if cond2 == False: + break + cl+=1 + else: + reduced.append(srt_beg[i]) + + #print('len red', len(reduced)) + #print(len(npFishes[0])) + return reduced + else: + return [] + +def sort_beginning(npFishes): + srted = npFishes + srted.sort(key=lambda x: x[0][0]) + #for i in srted[0][0]: + # print(i) + + return srted + +def sort_ending(npFishes): + srted = npFishes[:] + srted.sort(key=lambda x: x[0][-1]) + return srted + +def noisedscrd(fishes): + for fish in fishes: + print(np.std(fish[2])) + + +def plot_ontimes(ontime): + plt.fill_between(range(len(ontime[0])), ontime[0], color = '#1e2c3c', label = 'close') #'#324A64' + plt.fill_between(range(len(ontime[1])), ontime[1], color = '#324A64', label = 'nearby') + plt.fill_between(range(len(ontime[2])), ontime[2], color = '#8ea0b4', label = 'far') + plt.xlabel('seconds') + plt.ylabel('position') + plt.legend(loc = 1) + plt.ylim(0,1.5) + # plt.xlim(0,len()) + plt.show() + + + +if __name__ == '__main__': + main() diff --git a/thunderfish/DextersThunderfishAddition/let_them_rum_allfiles_DexThunder.sh b/thunderfish/DextersThunderfishAddition/let_them_rum_allfiles_DexThunder.sh new file mode 100644 index 00000000..c04b68a9 --- /dev/null +++ b/thunderfish/DextersThunderfishAddition/let_them_rum_allfiles_DexThunder.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +cat allfiles.txt | while read -r line + do + python3 -W ignore DextersThunderfishAddition.py "${line:0:-4}/$line" + done diff --git a/thunderfish/DextersThunderfishAddition/run_example.sh b/thunderfish/DextersThunderfishAddition/run_example.sh new file mode 100644 index 00000000..14e372a9 --- /dev/null +++ b/thunderfish/DextersThunderfishAddition/run_example.sh @@ -0,0 +1,5 @@ +python3 -W ignore DextersThunderfishAddition.py ~/40320L01_G11/40320L01_G11.WAV 0 1 1 3700 4000 + +#python3 analyseDexRefactorShort.py ~/70319L01_F1/70319L01_F1.WAV 0 1 1 #3700 4000 +#python3 analyseDexRefactorShort.py ~/40320L01_G11/40320L01_G11.WAV 0 1 1 #3700 4000 +#python3 analyseDexRefactor.py ~/40320L01_G11/40320L01_G11.WAV 0 1 1 0 100