-
Notifications
You must be signed in to change notification settings - Fork 34
/
feature-spectrogram.h
92 lines (68 loc) · 2.64 KB
/
feature-spectrogram.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
// kaldifeat/csrc/feature-spectrogram.h
//
// Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang)
// This file is copied/modified from kaldi/src/feat/feature-spectrogram.h
#ifndef KALDIFEAT_CSRC_FEATURE_SPECTROGRAM_H_
#define KALDIFEAT_CSRC_FEATURE_SPECTROGRAM_H_
#include <string>
#include "kaldifeat/csrc/feature-common.h"
#include "kaldifeat/csrc/feature-window.h"
#include "torch/script.h"
namespace kaldifeat {
struct SpectrogramOptions {
FrameExtractionOptions frame_opts;
// Floor on energy (absolute, not relative) in Spectrogram
// computation. Caution: this floor is applied to the
// zeroth component, representing the total signal energy.
// The floor on the individual spectrogram elements is fixed at
// std::numeric_limits<float>::epsilon()
float energy_floor = 0.0;
// If true, compute energy before preemphasis and windowing
bool raw_energy = true;
// If true, return raw FFT complex numbers instead of log magnitudes
// Not implemented yet
bool return_raw_fft = false;
torch::Device device{"cpu"};
std::string ToString() const {
std::ostringstream os;
os << "SpectrogramOptions(";
os << "frame_opts=" << frame_opts.ToString() << ", ";
os << "energy_floor=" << energy_floor << ", ";
os << "raw_energy=" << (raw_energy ? "True" : "False") << ", ";
os << "return_raw_fft=" << (return_raw_fft ? "True" : "False") << ", ";
os << "device=\"" << device << "\")";
return os.str();
}
};
std::ostream &operator<<(std::ostream &os, const SpectrogramOptions &opts);
class SpectrogramComputer {
public:
using Options = SpectrogramOptions;
explicit SpectrogramComputer(const SpectrogramOptions &opts);
~SpectrogramComputer() = default;
const FrameExtractionOptions &GetFrameOptions() const {
return opts_.frame_opts;
}
const SpectrogramOptions &GetOptions() const { return opts_; }
int32_t Dim() const {
if (opts_.return_raw_fft) {
return opts_.frame_opts.PaddedWindowSize();
} else {
return opts_.frame_opts.PaddedWindowSize() / 2 + 1;
}
}
bool NeedRawLogEnergy() const { return opts_.raw_energy; }
// signal_raw_log_energy is log_energy_pre_window, which is not empty
// iff NeedRawLogEnergy() returns true.
//
// vtln_warp is ignored by this function, it's only
// needed for interface compatibility.
torch::Tensor Compute(torch::Tensor signal_raw_log_energy, float vtln_warp,
const torch::Tensor &signal_frame);
private:
SpectrogramOptions opts_;
float log_energy_floor_;
};
using Spectrogram = OfflineFeatureTpl<SpectrogramComputer>;
} // namespace kaldifeat
#endif // KALDIFEAT_CSRC_FEATURE_SPECTROGRAM_H_