Implement a method to get nonsilence frames

alphacep · May 31, 2020 · b087e2b · b087e2b
1 parent 6fa1949
commit b087e2b
Show file tree

Hide file tree

Showing 2 changed files with 61 additions and 10 deletions.
diff --git a/src/online2/online-ivector-feature.cc b/src/online2/online-ivector-feature.cc
@@ -480,7 +480,8 @@ OnlineSilenceWeighting::OnlineSilenceWeighting(
 
 template <typename FST>
 void OnlineSilenceWeighting::ComputeCurrentTraceback(
-    const LatticeFasterOnlineDecoderTpl<FST> &decoder) {
+    const LatticeFasterOnlineDecoderTpl<FST> &decoder,
+    bool use_final_probs) {
   int32 num_frames_decoded = decoder.NumFramesDecoded(),
       num_frames_prev = frame_info_.size();
   // note, num_frames_prev is not the number of frames previously decoded,
@@ -495,7 +496,6 @@ void OnlineSilenceWeighting::ComputeCurrentTraceback(
   if (num_frames_decoded == 0)
     return;
   int32 frame = num_frames_decoded - 1;
-  bool use_final_probs = false;
   typename LatticeFasterOnlineDecoderTpl<FST>::BestPathIterator iter =
       decoder.BestPathEnd(use_final_probs, NULL);
   while (frame >= 0) {
@@ -530,7 +530,8 @@ void OnlineSilenceWeighting::ComputeCurrentTraceback(
 
 template <typename FST>
 void OnlineSilenceWeighting::ComputeCurrentTraceback(
-    const LatticeIncrementalOnlineDecoderTpl<FST> &decoder) {
+    const LatticeIncrementalOnlineDecoderTpl<FST> &decoder,
+    bool use_final_probs) {
   int32 num_frames_decoded = decoder.NumFramesDecoded(),
       num_frames_prev = frame_info_.size();
   // note, num_frames_prev is not the number of frames previously decoded,
@@ -545,7 +546,6 @@ void OnlineSilenceWeighting::ComputeCurrentTraceback(
   if (num_frames_decoded == 0)
     return;
   int32 frame = num_frames_decoded - 1;
-  bool use_final_probs = false;
   typename LatticeIncrementalOnlineDecoderTpl<FST>::BestPathIterator iter =
       decoder.BestPathEnd(use_final_probs, NULL);
   while (frame >= 0) {
@@ -582,16 +582,20 @@ void OnlineSilenceWeighting::ComputeCurrentTraceback(
 // Instantiate the template OnlineSilenceWeighting::ComputeCurrentTraceback().
 template
 void OnlineSilenceWeighting::ComputeCurrentTraceback<fst::Fst<fst::StdArc> >(
-    const LatticeFasterOnlineDecoderTpl<fst::Fst<fst::StdArc> > &decoder);
+    const LatticeFasterOnlineDecoderTpl<fst::Fst<fst::StdArc> > &decoder,
+    bool use_final_probs);
 template
 void OnlineSilenceWeighting::ComputeCurrentTraceback<fst::GrammarFst>(
-    const LatticeFasterOnlineDecoderTpl<fst::GrammarFst> &decoder);
+    const LatticeFasterOnlineDecoderTpl<fst::GrammarFst> &decoder,
+    bool use_final_probs);
 template
 void OnlineSilenceWeighting::ComputeCurrentTraceback<fst::Fst<fst::StdArc> >(
-    const LatticeIncrementalOnlineDecoderTpl<fst::Fst<fst::StdArc> > &decoder);
+    const LatticeIncrementalOnlineDecoderTpl<fst::Fst<fst::StdArc> > &decoder,
+    bool use_final_probs);
 template
 void OnlineSilenceWeighting::ComputeCurrentTraceback<fst::GrammarFst>(
-    const LatticeIncrementalOnlineDecoderTpl<fst::GrammarFst> &decoder);
+    const LatticeIncrementalOnlineDecoderTpl<fst::GrammarFst> &decoder,
+    bool use_final_probs);
 
 
 void OnlineSilenceWeighting::GetDeltaWeights(
@@ -700,4 +704,42 @@ void OnlineSilenceWeighting::GetDeltaWeights(
   }
 }
 
+void OnlineSilenceWeighting::GetNonsilenceFrames(
+    int32 num_frames_ready, int32 first_decoder_frame,
+    std::vector<int32> *frames) {
+  // num_frames_ready is at the feature frame-rate, most of the code
+  // in this function is at the decoder frame-rate.
+  // round up, so we are sure to get weights for at least the frame
+  // 'num_frames_ready - 1', and maybe one or two frames afterward.
+  KALDI_ASSERT(num_frames_ready > first_decoder_frame || num_frames_ready == 0);
+  int32 fs = frame_subsampling_factor_,
+  num_decoder_frames_ready = (num_frames_ready - first_decoder_frame + fs - 1) / fs;
+
+  frames->clear();
+
+  int32 prev_num_frames_processed = frame_info_.size();
+  if (frame_info_.size() < static_cast<size_t>(num_decoder_frames_ready))
+    frame_info_.resize(num_decoder_frames_ready);
+
+  // Don't go further backward into the past then 500 frames before the most
+  // recent frame
+  int32 begin_frame = std::max<int32>(0, prev_num_frames_processed - 500),
+      frames_out = static_cast<int32>(frame_info_.size()) - begin_frame;
+  // frames_out is the number of frames we will output.
+  KALDI_ASSERT(frames_out >= 0);
+
+  for (int32 offset = 0; offset < frames_out; offset++) {
+    int32 frame = begin_frame + offset;
+    int32 transition_id = frame_info_[frame].transition_id;
+    if (transition_id != -1) {
+      int32 phone = trans_model_.TransitionIdToPhone(transition_id);
+      bool is_silence = (silence_phones_.count(phone) != 0);
+      if (!is_silence) {
+        frames->push_back(frame);
+      }
+    }
+  }
+}
+
+
 }  // namespace kaldi
diff --git a/src/online2/online-ivector-feature.h b/src/online2/online-ivector-feature.h
@@ -482,9 +482,11 @@ class OnlineSilenceWeighting {
   // the decoder using its BestPathEnd() and related functions.
   // It will be instantiated for FST == fst::Fst<fst::StdArc> and fst::GrammarFst.
   template <typename FST>
-  void ComputeCurrentTraceback(const LatticeFasterOnlineDecoderTpl<FST> &decoder);
+  void ComputeCurrentTraceback(const LatticeFasterOnlineDecoderTpl<FST> &decoder,
+                               bool use_final_probs = false);
   template <typename FST>
-  void ComputeCurrentTraceback(const LatticeIncrementalOnlineDecoderTpl<FST> &decoder);
+  void ComputeCurrentTraceback(const LatticeIncrementalOnlineDecoderTpl<FST> &decoder,
+                               bool use_final_probs = false);
 
   // Calling this function gets the changes in weight that require us to modify
   // the stats... the output format is (frame-index, delta-weight).
@@ -522,6 +524,13 @@ class OnlineSilenceWeighting {
     GetDeltaWeights(num_frames_ready, 0, delta_weights);
   }
 
+  // Gets a list of nonsilence frames collected on traceback. Useful
+  // for algorithms to extract speaker properties like speaker identification
+  // vectors.
+  void GetNonsilenceFrames(
+      int32 num_frames_ready, int32 first_decoder_frame,
+      std::vector<int32> *frames);
+
  private:
   const TransitionModel &trans_model_;
   const OnlineSilenceWeightingConfig &config_;