/
algorithm_stream.h
197 lines (177 loc) · 11.3 KB
/
algorithm_stream.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
/*
* ESPRESSIF MIT License
*
* Copyright (c) 2019 <ESPRESSIF SYSTEMS (SHANGHAI) CO., LTD>
*
* Permission is hereby granted for use on all ESPRESSIF SYSTEMS products, in which case,
* it is free of charge, to any person obtaining a copy of this software and associated
* documentation files (the "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the Software is furnished
* to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all copies or
* substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef _ALGORITHM_STREAM_H_
#define _ALGORITHM_STREAM_H_
#include "audio_element.h"
#ifdef __cplusplus
extern "C" {
#endif
#define ALGORITHM_STREAM_PINNED_TO_CORE 0
#define ALGORITHM_STREAM_TASK_PERIOD 21
#define ALGORITHM_STREAM_RINGBUFFER_SIZE 1024
#define ALGORITHM_STREAM_TASK_STACK_SIZE (5 * 1024)
#define ALGORITHM_STREAM_DEFAULT_SAMPLE_RATE_HZ 8000
#define ALGORITHM_STREAM_DEFAULT_SAMPLE_BIT 16
#define ALGORITHM_STREAM_DEFAULT_MIC_CHANNELS 1
#define ALGORITHM_STREAM_DEFAULT_AGC_GAIN_DB 5
/*
// AEC: Acoustic Echo Cancellation
// AGC: Automatic Gain Control
// WWE: Wake Word Engine
// NS: Noise Suppression
+-----------+
| |
| TYPE 1 |
| |
+--------------------------------------+-----------+----------------------------------------+
| |
| reference signal |
+-----------+ | +-----------+ +-----------\ +-----------+ +-----------+ +-----------+ |
| | | | | | \ | | | | | | |
| I2S read |-------|->| Resample |--->| Data split |--->| AEC |--->| NS |--->| AGC | |
| | | | | | / | | | | | | |
+-----------+ | +-----------+ +-----------/ +------------ +-----------+ +-----------+ |
| record signal |
| |
+-------------------------------------------------------------------------------------------+
+-----------+
| |
| TYPE 2 |
| |
+--------------------------------------+-----------+----------------------------------------+
| |
| |
+-----------+ | +-----------+ +-----------+ +-----------+ +-----------+ +-----------+ |
| | | | | | | | | | | | | |
| I2S read |-------|->| Resample |--->| rec signal|--->| AEC |--->| NS |--->| AGC | |
| | | | | | | | | | | | | |
+-----------+ | +-----------+ +-----------+ +-----^-----+ +-----------+ +-----------+ |
| | |
+-----------+ | +-----------+ +-----------+ | |
| | | | | | | | |
| input_rb |-------|->| Resample |--->| ref signal|----------+ |
| | | | | | | |
+-----------+ | +-----------+ +-----------+ |
| |
+-------------------------------------------------------------------------------------------+
*/
/**
* @brief Two types of algorithm stream input method
*/
typedef enum {
ALGORITHM_STREAM_INPUT_TYPE1 = 0, /*!< Type 1 is default used by mini-board, the reference signal and the recording signal are respectively read in from the left channel and the right channel of the same I2S */
ALGORITHM_STREAM_INPUT_TYPE2 = 1, /*!< As the simple diagram above shows, when type2 is choosen, the recording signal and reference signal should be input by users. */
} algorithm_stream_input_type_t; /*!< The recording signal is inputted by I2S element by default, and the reference signal should be inputted to the algorithm element by using multiple input buffer. */
/*!< When use type2, you can combine arbitrarily the algorithm modules you want to use, use algo_mask parameters below to configure that. */
/**
* @brief Choose the algorithm to be used
*/
typedef enum {
ALGORITHM_STREAM_USE_AEC = (0x1 << 0), /*!< Use AEC */
ALGORITHM_STREAM_USE_AGC = (0x1 << 1), /*!< Use AGC */
ALGORITHM_STREAM_USE_NS = (0x1 << 2), /*!< Use NS */
ALGORITHM_STREAM_USE_VAD = (0x1 << 3) /*!< Use VAD */
} algorithm_stream_mask_t;
/**
* @brief Algorithm stream configurations
*/
typedef struct {
algorithm_stream_input_type_t input_type; /*!< Input type of stream */
int task_stack; /*!< Task stack size */
int task_prio; /*!< Task peroid */
int task_core; /*!< The core that task to be created */
int out_rb_size; /*!< Size of output ringbuffer */
bool stack_in_ext; /*!< Try to allocate stack in external memory */
int rec_linear_factor; /*!< The linear amplication factor of record signal*/
int ref_linear_factor; /*!< The linear amplication factor of reference signal */
bool debug_input; /*!< debug algorithm input data */
bool swap_ch; /*!< Swap left and right channels */
int8_t algo_mask; /*!< Choose algorithm to use */
int sample_rate; /*!< The sampling rate of the input PCM (in Hz) */
int mic_ch; /*!< MIC channel num */
int agc_gain; /*!< AGC gain(dB) for voice communication */
bool aec_low_cost; /*!< AEC uses less cpu and ram resources,
but has poor suppression of nonlinear distortion */
} algorithm_stream_cfg_t;
#define ALGORITHM_STREAM_DEFAULT_MASK (ALGORITHM_STREAM_USE_AEC | ALGORITHM_STREAM_USE_NS)
#define ALGORITHM_STREAM_CFG_DEFAULT() { \
.input_type = ALGORITHM_STREAM_INPUT_TYPE1, \
.task_stack = ALGORITHM_STREAM_TASK_STACK_SIZE, \
.task_prio = ALGORITHM_STREAM_TASK_PERIOD, \
.task_core = ALGORITHM_STREAM_PINNED_TO_CORE, \
.out_rb_size = ALGORITHM_STREAM_RINGBUFFER_SIZE, \
.stack_in_ext = true, \
.rec_linear_factor = 1, \
.ref_linear_factor = 1, \
.debug_input = false, \
.swap_ch = false, \
.algo_mask = ALGORITHM_STREAM_DEFAULT_MASK, \
.sample_rate = ALGORITHM_STREAM_DEFAULT_SAMPLE_RATE_HZ, \
.mic_ch = ALGORITHM_STREAM_DEFAULT_MIC_CHANNELS, \
.agc_gain = ALGORITHM_STREAM_DEFAULT_AGC_GAIN_DB, \
.aec_low_cost = false, \
}
/**
* @brief Initialize algorithm stream
*
* @param config The algorithm Stream configuration
*
* @return The audio element handle
*/
audio_element_handle_t algo_stream_init(algorithm_stream_cfg_t *config);
/**
* @brief Set playback signal or recording signal delay when use type2
*
* @note The AEC internal buffering mechanism requires that the recording signal
* is delayed by around 0 - 10 ms compared to the corresponding reference (playback) signal.
*
* @param el Handle of element
* @param ringbuf Handle of ringbuf
* @param delay_ms The delay between playback and recording in ms
* This delay_ms can be debugged by yourself, you can set the configuration debug_input to true,
* then get the original input data (left channel is the signal captured from the microphone,
* right channel is the signal played to the speaker),
* and check the delay with an audio analysis tool.
*
* @return
* - ESP_OK
* - ESP_FAIL
* - ESP_ERR_INVALID_ARG
*/
audio_element_err_t algo_stream_set_delay(audio_element_handle_t el, ringbuf_handle_t ringbuf, int delay_ms);
/**
* @brief Fix I2S mono noise issue
*
* @note This API only for ESP32 with I2S 16bits
*
* @param sbuff I2S data buffer
* @param len I2S data len
*
* @return ESP_OK
*/
esp_err_t algorithm_mono_fix(uint8_t *sbuff, uint32_t len);
#ifdef __cplusplus
}
#endif
#endif