Merge pull request #669 from aodn/spike_development

SpikeQC Burst Spike functionality, Preview, and small fixes
aodn · Jul 15, 2020 · 9a4a551 · 9a4a551
2 parents dd1b85c + 3388992
commit 9a4a551
Show file tree

Hide file tree

Showing 24 changed files with 2,446 additions and 429 deletions.
diff --git a/AutomaticQC/SpikeClassifiers/imosSpikeClassifierBurstHampel.m b/AutomaticQC/SpikeClassifiers/imosSpikeClassifierBurstHampel.m
@@ -0,0 +1,176 @@
+function [spikes] = imosSpikeClassifierBurstHampel(bindexes, signal, use_burst_window, half_window_width, madfactor, lower_mad_limit, repeated_only)
+% function [spikes] = imosSpikeClassifierBurstHampel(bindexes, signal,  use_burst_window,  half_window_width, madfactor, lower_mad_limit, repeated_only)
+%
+% A Hampel classifier for burst data. The half_window_width is applied at burst level instead of
+% sample level.
+%
+% Inputs:
+%
+% bindexes - a cell with the invidivudal burst range indexes.
+% signal - A Burst 1-d signal.
+% use_burst_window  - a boolean to consider the half_window_width applied at burst scale. If false, will consider the burst series as continuous.
+% half_window_width - The half_window_width burst range (the full window size will be 1+2*half_window_width)
+% madfactor -  A multipling scale for the MAD.
+% repeated_only - a boolean to mark spikes in burst only if they are detected more than one time (only for half_window_width>0).
+% lower_mad_limit - a lower threshold for the MAD values, which values below will be ignored.
+%
+% Outputs:
+%
+% spikes - An array with spikes indexes.
+% fsignal - A filtered signal where the spikes are substituted by median values of the window.
+%
+% Example:
+%
+% % simple spikes
+% x = randn(1,100)*1e-2;
+% spikes = [3,7,33,92,99]
+% x(spikes) = 1000;
+% bduration = 6;
+% v = 1:bduration:length(x)+bduration;
+% for k=1:length(v)-1;
+% bind{k} = [v(k),min(v(k+1)-1,length(x))];
+% end
+% [dspikes] = imosSpikeClassifierBurstHampel(bind,x);
+% assert(isequal(dspikes,spikes));
+% % equal to Hampel
+% [dspikes] = imosSpikeClassifierBurstHampel(bind,x,length(bind));
+% [dspikes2] = imosSpikeClassifierHampel(x,length(bind));
+% assert(isequal(dspikes,spikes));
+% assert(isequal(dspikes,dspikes2));
+%
+% % detecting entire burst as spike
+% x = randn(1,100)*1e-2;
+% fullburst_spiked = [3,7,13,14,15,16,17,18,33,92,99]
+% x(fullburst_spiked) = 1000;
+% [dspikes] = imosSpikeClassifierBurstHampel(bind,x);
+% assert(isequal(dspikes,[3,7,33,92,99]));
+% [dspikes2] = imosSpikeClassifierBurstHampel(bind,x,true,2);
+% assert(isequal(dspikes2,fullburst_spiked))
+%
+%
+% author: hugo.oliveira@utas.edu.au
+%
+
+% Copyright (C) 2020, Australian Ocean Data Network (AODN) and Integrated
+% Marine Observing System (IMOS).
+%
+% This program is free software: you can redistribute it and/or modify
+% it under the terms of the GNU General Public License as published by
+% the Free Software Foundation version 3 of the License.
+%
+% This program is distributed in the hope that it will be useful,
+% but WITHOUT ANY WARRANTY; without even the implied warranty of
+% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+% GNU General Public License for more details.
+%
+% You should have received a copy of the GNU General Public License
+% along with this program.
+% If not, see <https://www.gnu.org/licenses/gpl-3.0.en.html>.
+%
+
+narginchk(2, 7)
+
+if nargin == 2
+    use_burst_window = 0;
+    half_window_width = 1;
+    madfactor = 5;
+    lower_mad_limit = 0;
+    repeated_only = false;
+elseif nargin == 3
+    half_window_width = 1;
+    madfactor = 5;
+    lower_mad_limit = 0;
+    repeated_only = false;
+elseif nargin == 4
+    madfactor = 5;
+    lower_mad_limit = 0;
+    repeated_only = false;
+elseif nargin == 5
+    lower_mad_limit = 0;
+    repeated_only = false;
+elseif nargin == 6
+    repeated_only = false;
+end
+
+if ~use_burst_window
+    if half_window_width == 0
+        half_window_width = 1;
+    end
+    spikes = imosSpikeClassifierHampel(signal,half_window_width,madfactor,lower_mad_limit);
+    return
+end
+
+nbursts = double(half_window_width) .* 2 + 1;
+burst_is_series = nbursts >= length(bindexes);
+if burst_is_series
+    spikes = imosSpikeClassifierHampel(signal, ceil(length(signal)/2+1), madfactor,lower_mad_limit);
+    return
+end
+
+spikes = NaN(size(signal));
+%left bry
+bs = 1;
+range = 1:bindexes{nbursts}(end);
+[bspikes] = imosSpikeClassifierHampel(signal(range), length(range), madfactor,lower_mad_limit);
+
+if ~isempty(bspikes)
+    blen = length(bspikes);
+    spikes(bs:bs + blen - 1) = bspikes;
+    bs = bs + blen;
+end
+
+h=[];
+series_end_at_next_window = nbursts+1 >= length(bindexes)-nbursts;
+if series_end_at_next_window
+    bie = bindexes{nbursts+1}(1);
+else
+    sburst = 1;
+    ni = nbursts+1;
+    ne = length(bindexes)-nbursts;
+    h=waitbar(0,'Computing Spikes');
+    wbval=0;
+    wbint=0.05;
+    for eburst = ni:ne
+        if eburst/ne > (wbval+wbint)
+            wbval = wbval+wbint;
+            waitbar(eburst/ne,h,'Computing Spikes');
+        end
+        sburst = sburst + 1;
+        bis = bindexes{sburst}(1);
+        bie = bindexes{eburst}(end);
+        csignal = signal(bis:bie);
+        [bspikes] = imosSpikeClassifierHampel(csignal, length(csignal), madfactor,lower_mad_limit);
+
+        if ~isempty(bspikes)
+            blen = length(bspikes);
+            spikes(bs:bs + blen - 1) = bspikes + bis - 1;
+            bs = bs + blen;
+        end
+
+    end
+end
+if ~isempty(h)
+    h.delete()
+end
+%right bry
+range = bie + 1:bindexes{end}(end);
+[bspikes] = imosSpikeClassifierHampel(signal(range), length(range), madfactor,lower_mad_limit);
+
+if ~isempty(bspikes)
+    blen = length(bspikes);
+    spikes(bs:bs + blen - 1) = bspikes + bie;
+end
+
+spikes = spikes(~isnan(spikes));
+if repeated_only
+    srange = min(spikes)-1:max(spikes)+1;
+    candidates = find(histcounts(spikes,srange)>1);
+    if ~isempty(candidates)
+        spikes = candidates;
+    end
+else
+    spikes = unique(spikes);
+end
+
+
+end
diff --git a/AutomaticQC/SpikeClassifiers/imosSpikeClassifierBurstRunningStats.m b/AutomaticQC/SpikeClassifiers/imosSpikeClassifierBurstRunningStats.m
@@ -0,0 +1,83 @@
+function [spikes] = imosSpikeClassifierBurstRunningStats(bindexes, signal, scalefun, dispersionfun, dispersion_factor)
+% function [spikes] = imosSpikeClassifierBurstRunningStats(bindexes, signal, scalefun, dispersionfun, dispersion_factor)
+%
+% A simple thresholding method based on user defined deviations around a central scale. Values above
+% the dispersion will be tagged.
+%
+% Inputs:
+%
+% bindexes - a cell with the invidivudal burst range indexes.
+% signal - A Burst 1-d signal.
+% scalefun - the function to estimate the centre of the distribution. (e.g. mean or median)
+%          Default: @mean
+% dispersionfun - the function to estimate the dispersion (e.g. std, mean_absolute_deviation, median_absolute_deviation)
+%               Default: @std
+% dispersion_factor - the multiplying factor for the dispersion.
+%                   Default: 2
+%
+% Outputs:
+%
+% spikes - An array with spikes indexes.
+% cscale - the computed scale
+% cdispersion - the computed dispersion (scaled).
+%
+% Example:
+% z = randn(1,10);
+% z(10) = 10000;
+% [spikes] = imosSpikeClassifierRunningStats(z,@mean,@std,2);
+% assert(spikes(10)==1)
+% assert(spikes(1:9)==0)
+%
+% author: hugo.oliveira@utas.edu.au
+%
+
+% Copyright (C) 2020, Australian Ocean Data Network (AODN) and Integrated
+% Marine Observing System (IMOS).
+%
+% This program is free software: you can redistribute it and/or modify
+% it under the terms of the GNU General Public License as published by
+% the Free Software Foundation version 3 of the License.
+%
+% This program is distributed in the hope that it will be useful,
+% but WITHOUT ANY WARRANTY; without even the implied warranty of
+% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+% GNU General Public License for more details.
+%
+% You should have received a copy of the GNU General Public License
+% along with this program.
+% If not, see <https://www.gnu.org/licenses/gpl-3.0.en.html>.
+%
+narginchk(2,5)
+if nargin==2
+	scalefun = @nanmean;
+	dispersionfun = @nanstd;
+	dispersion_factor = 2;
+elseif nargin == 3
+	dispersionfun = @nanstd;
+	dispersion_factor = 2;
+elseif nargin == 4
+	dispersion_factor = 2;
+end
+
+spikes = NaN(size(signal));
+ne = length(bindexes);
+
+h=waitbar(0,'Computing Spikes');
+wbval=0;
+wbint=0.05;
+for k=1:ne
+	if k/ne > (wbval+wbint)
+		wbval=wbval+wbint;
+		waitbar(k/ne,h,'Computing Spikes');
+	end
+	brange = bindexes{k};
+	xs = brange(1);
+	xe = brange(2);
+	bspikes = imosSpikeClassifierRunningStats(signal(xs:xe),scalefun,dispersionfun,dispersion_factor);
+	if ~isempty(bspikes)
+		spikes(xs:xs+length(bspikes)-1) = bspikes+xs-1;
+	end
+end
+h.delete();
+spikes = unique(spikes(~isnan(spikes)));
+end
diff --git a/AutomaticQC/SpikeClassifiers/imosSpikeClassifierHampel.m b/AutomaticQC/SpikeClassifiers/imosSpikeClassifierHampel.m
@@ -1,30 +1,25 @@
-function [spikes, fsignal] = imosSpikeClassifierHampel(signal, window, stdfactor)
-% function spikes, fsignal = imosSpikeClassifierHampel(signal, window, stdfactor)
+function [spikes, fsignal] = imosSpikeClassifierHampel(signal, half_window_width, madfactor, lower_mad_limit)
+% function spikes, fsignal = imosSpikeClassifierHampel(signal, half_window_width, madfactor, lower_mad_limit)
 %
-% A wrapper to Hampel, a median windowed filter, for both
-% Burst and Equally spaced time series.
-%
-% The hampel method is a robust parametric denoising filter that uses
-% the MAD deviation (scaled) from the median in a pre-defined
-% indexed centred window.
+% A wrapper to Hampel classifier.The hampel method is a robust parametric denoising filter that uses
+% the MAD deviation (scaled) from the median in a pre-defined centred window.
 %
 % See also hampel.m.
 %
 % Inputs:
 %
 % signal - A 1-d signal.
-% window - A window index range. If signal is in burst mode, this is ignored.
-% stdfactor - A multiplying scale for the standard deviation.
+% half_window_width - A half_window_width index range (the full window size will be 1+2*half_window_width)
+% madfactor - A multiplying scale for the standard deviation.
+% lower_mad_limit - a lower threshold for the MAD values, which values below will be ignored.
 %
 % Outputs:
 %
 % spikes - An array with spikes indexes.
-% fsignal - A filtered signal where the spikes are subsituted
-%           by the median of the window.
+% fsignal - A filtered signal where the spikes are substituted by median values of the window.
 %
 % Example:
 %
-%
 % author: hugo.oliveira@utas.edu.au
 %
 
@@ -44,14 +39,20 @@
 % along with this program.
 % If not, see <https://www.gnu.org/licenses/gpl-3.0.en.html>.
 %
+narginchk(1, 4)
 
-if nargin < 3
-    window = 3;
-    stdfactor = 10;
-elseif nargin < 2
-    stdfactor = 10;
+if nargin == 1
+    half_window_width = 3;
+    madfactor = 10;
+    lower_mad_limit = 0.0;
+elseif nargin == 2
+    madfactor = 10;
+    lower_mad_limit = 0.0;
+elseif nargin == 3
+    lower_mad_limit = 0.0;
 end
 
-[fsignal, bind, ~, ~] = hampel(signal, window, stdfactor);
-spikes = find(bind);
+[fsignal, bind, ~, smad] = hampel(signal, half_window_width, madfactor);
+above_limit = smad > lower_mad_limit;
+spikes = find(bind .* above_limit);
 end