Skip to content

Commit

Permalink
Implement a method to get nonsilence frames
Browse files Browse the repository at this point in the history
  • Loading branch information
nshmyrev committed May 31, 2020
1 parent 6fa1949 commit b087e2b
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 10 deletions.
58 changes: 50 additions & 8 deletions src/online2/online-ivector-feature.cc
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,8 @@ OnlineSilenceWeighting::OnlineSilenceWeighting(

template <typename FST>
void OnlineSilenceWeighting::ComputeCurrentTraceback(
const LatticeFasterOnlineDecoderTpl<FST> &decoder) {
const LatticeFasterOnlineDecoderTpl<FST> &decoder,
bool use_final_probs) {
int32 num_frames_decoded = decoder.NumFramesDecoded(),
num_frames_prev = frame_info_.size();
// note, num_frames_prev is not the number of frames previously decoded,
Expand All @@ -495,7 +496,6 @@ void OnlineSilenceWeighting::ComputeCurrentTraceback(
if (num_frames_decoded == 0)
return;
int32 frame = num_frames_decoded - 1;
bool use_final_probs = false;
typename LatticeFasterOnlineDecoderTpl<FST>::BestPathIterator iter =
decoder.BestPathEnd(use_final_probs, NULL);
while (frame >= 0) {
Expand Down Expand Up @@ -530,7 +530,8 @@ void OnlineSilenceWeighting::ComputeCurrentTraceback(

template <typename FST>
void OnlineSilenceWeighting::ComputeCurrentTraceback(
const LatticeIncrementalOnlineDecoderTpl<FST> &decoder) {
const LatticeIncrementalOnlineDecoderTpl<FST> &decoder,
bool use_final_probs) {
int32 num_frames_decoded = decoder.NumFramesDecoded(),
num_frames_prev = frame_info_.size();
// note, num_frames_prev is not the number of frames previously decoded,
Expand All @@ -545,7 +546,6 @@ void OnlineSilenceWeighting::ComputeCurrentTraceback(
if (num_frames_decoded == 0)
return;
int32 frame = num_frames_decoded - 1;
bool use_final_probs = false;
typename LatticeIncrementalOnlineDecoderTpl<FST>::BestPathIterator iter =
decoder.BestPathEnd(use_final_probs, NULL);
while (frame >= 0) {
Expand Down Expand Up @@ -582,16 +582,20 @@ void OnlineSilenceWeighting::ComputeCurrentTraceback(
// Instantiate the template OnlineSilenceWeighting::ComputeCurrentTraceback().
template
void OnlineSilenceWeighting::ComputeCurrentTraceback<fst::Fst<fst::StdArc> >(
const LatticeFasterOnlineDecoderTpl<fst::Fst<fst::StdArc> > &decoder);
const LatticeFasterOnlineDecoderTpl<fst::Fst<fst::StdArc> > &decoder,
bool use_final_probs);
template
void OnlineSilenceWeighting::ComputeCurrentTraceback<fst::GrammarFst>(
const LatticeFasterOnlineDecoderTpl<fst::GrammarFst> &decoder);
const LatticeFasterOnlineDecoderTpl<fst::GrammarFst> &decoder,
bool use_final_probs);
template
void OnlineSilenceWeighting::ComputeCurrentTraceback<fst::Fst<fst::StdArc> >(
const LatticeIncrementalOnlineDecoderTpl<fst::Fst<fst::StdArc> > &decoder);
const LatticeIncrementalOnlineDecoderTpl<fst::Fst<fst::StdArc> > &decoder,
bool use_final_probs);
template
void OnlineSilenceWeighting::ComputeCurrentTraceback<fst::GrammarFst>(
const LatticeIncrementalOnlineDecoderTpl<fst::GrammarFst> &decoder);
const LatticeIncrementalOnlineDecoderTpl<fst::GrammarFst> &decoder,
bool use_final_probs);


void OnlineSilenceWeighting::GetDeltaWeights(
Expand Down Expand Up @@ -700,4 +704,42 @@ void OnlineSilenceWeighting::GetDeltaWeights(
}
}

void OnlineSilenceWeighting::GetNonsilenceFrames(
int32 num_frames_ready, int32 first_decoder_frame,
std::vector<int32> *frames) {
// num_frames_ready is at the feature frame-rate, most of the code
// in this function is at the decoder frame-rate.
// round up, so we are sure to get weights for at least the frame
// 'num_frames_ready - 1', and maybe one or two frames afterward.
KALDI_ASSERT(num_frames_ready > first_decoder_frame || num_frames_ready == 0);
int32 fs = frame_subsampling_factor_,
num_decoder_frames_ready = (num_frames_ready - first_decoder_frame + fs - 1) / fs;

frames->clear();

int32 prev_num_frames_processed = frame_info_.size();
if (frame_info_.size() < static_cast<size_t>(num_decoder_frames_ready))
frame_info_.resize(num_decoder_frames_ready);

// Don't go further backward into the past then 500 frames before the most
// recent frame
int32 begin_frame = std::max<int32>(0, prev_num_frames_processed - 500),
frames_out = static_cast<int32>(frame_info_.size()) - begin_frame;
// frames_out is the number of frames we will output.
KALDI_ASSERT(frames_out >= 0);

for (int32 offset = 0; offset < frames_out; offset++) {
int32 frame = begin_frame + offset;
int32 transition_id = frame_info_[frame].transition_id;
if (transition_id != -1) {
int32 phone = trans_model_.TransitionIdToPhone(transition_id);
bool is_silence = (silence_phones_.count(phone) != 0);
if (!is_silence) {
frames->push_back(frame);
}
}
}
}


} // namespace kaldi
13 changes: 11 additions & 2 deletions src/online2/online-ivector-feature.h
Original file line number Diff line number Diff line change
Expand Up @@ -482,9 +482,11 @@ class OnlineSilenceWeighting {
// the decoder using its BestPathEnd() and related functions.
// It will be instantiated for FST == fst::Fst<fst::StdArc> and fst::GrammarFst.
template <typename FST>
void ComputeCurrentTraceback(const LatticeFasterOnlineDecoderTpl<FST> &decoder);
void ComputeCurrentTraceback(const LatticeFasterOnlineDecoderTpl<FST> &decoder,
bool use_final_probs = false);
template <typename FST>
void ComputeCurrentTraceback(const LatticeIncrementalOnlineDecoderTpl<FST> &decoder);
void ComputeCurrentTraceback(const LatticeIncrementalOnlineDecoderTpl<FST> &decoder,
bool use_final_probs = false);

// Calling this function gets the changes in weight that require us to modify
// the stats... the output format is (frame-index, delta-weight).
Expand Down Expand Up @@ -522,6 +524,13 @@ class OnlineSilenceWeighting {
GetDeltaWeights(num_frames_ready, 0, delta_weights);
}

// Gets a list of nonsilence frames collected on traceback. Useful
// for algorithms to extract speaker properties like speaker identification
// vectors.
void GetNonsilenceFrames(
int32 num_frames_ready, int32 first_decoder_frame,
std::vector<int32> *frames);

private:
const TransitionModel &trans_model_;
const OnlineSilenceWeightingConfig &config_;
Expand Down

0 comments on commit b087e2b

Please sign in to comment.