forked from snikolov/rumor
/
chap3.tex
162 lines (149 loc) · 7.28 KB
/
chap3.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
\chapter{Algorithm}
\label{ch:alg}
%TODO: Graphics of shift invariant distance, closest match, etc.
%TODO. chapter references
In this chapter, I describe the practical implementation of the method described
in Chapter \ref{ch:method}.
% Assume we have the normalize and sliced signals already and just talk about
% detection.
\section{Overview}
The goal of the algorithm is to perform online classification of an infinite
stream of samples from an observed digital signal. We will focus on the case of
binary classification, in which we have positive signals and negative signals,
but the results can be extended to multiple classes. For binary classification,
one could imagine that one class represents events and the other non-events, and
that we would like to detect events as soon as they happen.
To predict which class the observed signal belongs to at a given point in time,
we compute the probability that the recent samples of the observed signal were
generated by a latent source from the positive class and the probability that
they were generated by a latent source from the negative class, based on
previously observed {\em reference signals} for each class. Recall from Chapter
\ref{ch:method} that a signal is generated by a latent source of a particular
class if it shares a latent source with some reference signal from that class.
To compute the probability that the recently observed samples share the same
latent source with a particular reference signal, we compute the distance
between the trajectory consisting of the recently observed samples and all
trajectories of the same size in the reference signal, and take the minimum over
all such trajectories.
\section{Implementation}
In practice, the computation of conditional class probabilities amounts to
nothing more than computing distances. To compute the probability that an
observation belongs to a particular class, one simply computes the distance from
the observation to each reference signal in that class in order to see how much
the observation resembles the reference signals for that class.
Algorithm \ref{alg:Detect} contains the core detection logic.
% TODO: Justification for shift-invariance.
\begin{algorithm}
\caption{Perform online binary classification on the infinite stream $\mb
s_{\infty}$ using sets of positive and negative reference signals $R_+$ and
$R_-$.}
\label{alg:Detect}
\at{Detect}($\mb s_{\infty}$, $\mathcal{R}_+$, $\mathcal{R}_-$, $\gamma$, $\theta$,
$D_{req}$):
\begin{algorithmic}[1]
\STATE \vt{ConsecutiveDetections} $\leftarrow$ 0
\LOOP
\STATE $\mb s$ $\leftarrow$ \at{UpdateObservation}($\mb s_{\infty}$, $N_{obs}$)
\FOR{$\mb r$ {\bf in} $\mathcal{R}_+$}
\STATE \vt{PosDists}.\at{Append}(\at{DistToReference}($\mb s$, $\mb r$))
\ENDFOR
\FOR{$\mb r$ {\bf in} $\mathcal{R}_-$}
\STATE \vt{NegDists}.\at{Append}(\at{DistToReference}($\mb s$, $\mb r$))
\ENDFOR
\STATE \vt{R} = \at{ProbClass}(\vt{PosDists}, $\gamma$) / \at{ProbClass}(\vt{NegDists}, $\gamma$)
\IF{\vt{R} $> \theta$ }
\IF{\vt{ConsecutiveDetections} $>$ $D_{req}$}
\STATE \vt{DetectionTime} $\leftarrow$ \at{CurrentTime}()
\RETURN \vt{DetectionTime}
\ELSE
\STATE \vt{ConsecutiveDetections} $\leftarrow$ \vt{ConsecutiveDetections} + 1
\ENDIF
\ELSE
\STATE \vt{ConsecutiveDetections} $\leftarrow$ 0
\ENDIF
\ENDLOOP
\end{algorithmic}
\end{algorithm}
At each time step, it updates the observation $\mb
s$ so that $\mb s$ contains the latest $N_{obs}$ samples from the infinite
stream $\mb s_{\infty}$ and computes the distances \vt{PosDists}
(resp. \vt{NegDists}) to reference signals of the positive class (resp. negative
class). A detection is declared whenever the ratio of class probabilities
$R(\mb s)$ exceeds the threshold $\theta$ for $D_{req}$ consecutive time steps.
Algorithm \ref{alg:DistToReference} computes the distance between a reference
signal $\mb r$ and an observation $\mb s$.
\begin{algorithm}
\caption{Compute the minimum distance between $\mb s$ and all pieces of $\mb r$
of the same length as $\mb s$.}
\label{alg:DistToReference}
\at{DistToReference}($\mb s$, $\mb r$):
\begin{algorithmic}[1]
\STATE $N_{obs}$ $\leftarrow$ \at{length}($\mb s$)
\STATE $N_{ref}$ $\leftarrow$ \at{length}($\mb r$)
\STATE \vt{MinDist} = $\infty$
\FOR{$i=1$ \TO $N_{ref} - N_{obs} + 1$}
\STATE \vt{MinDist} = \at{Min}(\vt{MinDist}, \at{Dist}($\mb r_{i:i+N_{obs}-1}$, $\mb s$))
\ENDFOR
\RETURN \vt{MinDist}
\end{algorithmic}
\end{algorithm}
Since the reference signal is generally longer than the observation, we compute
the minimum distance (Algorithm \ref{alg:DistToSignal}) across all pieces of
$\mb r$ of the same size as $\mb s$.
Algorithm \ref{alg:DistToSignal} simply computes the Euclidean distance between
two signals of the same size.
\begin{algorithm}
\caption{Compute the distance between two signals $\mb s$ and $\mb t$ of the same
length}
\label{alg:DistToSignal}
\at{Dist}($\mb s$, $\mb t$):
\begin{algorithmic}[1]
\STATE \vt{D} $\leftarrow$ 0
\FOR{$i=1$ to \at{length}($\mb s$)}
\STATE \vt{D} $\leftarrow$ \vt{D} + $(s_i - t_i)^2$
\ENDFOR
\RETURN \vt{D}
\end{algorithmic}
\end{algorithm}
Using the distances from an observation to the reference signals of a class, we
compute a number proportional the probability that the observation belongs to
the class (Algorithm \ref{alg:ProbClass}).
\begin{algorithm}
\caption{Using the distances of an observation to the reference signals of a
certain class, compute a number proportional to the probability that the
observation belongs to that class.}
\label{alg:ProbClass}
\at{ProbClass}(\vt{Dists}, $\gamma$):
\begin{algorithmic}[1]
\STATE $P \leftarrow 0$
\FOR{$i=1$ to \at{Length}(\vt{Dists})}
\STATE $P \leftarrow P + \exp\left(-\gamma Dists_i\right)$
\ENDFOR
\RETURN $P$
\end{algorithmic}
\end{algorithm}
\clearpage
\section{Performance and Scalability}
% TODO
% Summarize performance
To do detection on an infinite stream for $T$ time steps, with $|\mathcal{R}_+|$
positive reference signals and $|\mathcal{R}_-|$ negative reference signals of
length $N_{ref}$, and observations of length $N_{obs}$, our rudimentary
implementation runs in $\mathcal{O}(TN_{ref}(|\mathcal{R}_-| +
|\mathcal{R}_-|))$ time. In practice, the algorithm can be made faster by a
constant factor by not performing detection on every time step, not computing
distances based on the full $N_{obs}$ samples, or not comparing the observation
to every single slice of the reference signal.
Clearly, the computational cost of our implementation grows with the amount of
data. Nevertheless, our approach is scalable, since one can compute in parallel
the scores for each of the topics, as well as each of the reference signal
distances for each topic.
% Potential to be Super fast using trajectory indexing and retrieval
A more sophisticated version of our algorithm would use an approach based on
time series indexing. For instance, Rakthanmanon et al. have shown a way to
efficiently search over trillions of time series subsequences
\cite{Rakthanmanon}. Since our probability-based metric involves exponential
decay based on the distance between signals, most reference signals that are far
away from the observation can safely be ignored. Thus, instead of computing the
distance to all reference signals, which could become costly, we can operate on
only a very small fraction of them without significantly affecting the outcome.