-
Notifications
You must be signed in to change notification settings - Fork 27
/
libav.cpp
532 lines (479 loc) · 17.9 KB
/
libav.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
/**
* Copyright 2013-2014, Dominik Schnitzer <dominik@schnitzer.at>
* 2014-2016, Jan Schlueter <jan.schlueter@ofai.at>
*
* This file is part of Musly, a program for high performance music
* similarity computation: http://www.musly.org/.
*
* This Source Code Form is subject to the terms of the Mozilla
* Public License v. 2.0. If a copy of the MPL was not distributed
* with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#define __STDC_CONSTANT_MACROS
#define __STDC_FORMAT_MACROS
#include <inttypes.h>
#include <stdint.h>
#include <vector>
#include <algorithm>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#ifdef HAVE_AVUTIL_CHANNEL_LAYOUT
#include <libavutil/channel_layout.h>
#endif
}
#include "minilog.h"
#include "resampler.h"
#include "libav.h"
// We define some macros to be compatible to different libav versions
// without spreading #if and #else all over the place.
#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(55, 45, 101)
#define AV_FRAME_ALLOC avcodec_alloc_frame
#define AV_FRAME_UNREF avcodec_get_frame_defaults
#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(54, 28, 0)
#define AV_FRAME_FREE(X) av_free(*(X))
#else
#define AV_FRAME_FREE avcodec_free_frame
#endif
#else
#define AV_FRAME_ALLOC av_frame_alloc
#define AV_FRAME_UNREF av_frame_unref
#define AV_FRAME_FREE av_frame_free
#endif
#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(57, 7, 0)
#define AV_PACKET_UNREF av_free_packet
#else
#define AV_PACKET_UNREF av_packet_unref
#endif
namespace musly {
namespace decoders {
MUSLY_DECODER_REGIMPL(libav, 0);
libav::libav()
{
#if LIBAVFORMAT_VERSION_INT < AV_VERSION_INT(58, 9, 100)
av_register_all();
#endif
#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58, 9, 100)
avcodec_register_all();
#endif
}
int
libav::samples_tofloat(
void* const out,
const void* const in,
const int out_stride,
const int in_stride,
const AVSampleFormat in_fmt,
int len)
{
const int is = in_stride;
const int os = out_stride;
const uint8_t* pi = (uint8_t*)in;
uint8_t* po = (uint8_t*)out;
uint8_t* end = po + os*len;
#define CONVFLOAT(ifmt, ifmtp, expr)\
if (in_fmt == ifmt || in_fmt == ifmtp) {\
do{\
*(float*)po = expr; pi += is; po += os;\
} while(po < end);\
}
// Implementation note: We could use av_get_packed_sample_fmt
// to avoid checking for two formats each time, but we want to
// stay compatible to libav versions that do not have it yet.
CONVFLOAT(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_U8P,
(*(const uint8_t*)pi - 0x80)*(1.0 / (1<<7)))
else CONVFLOAT(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16P,
*(const int16_t*)pi*(1.0 / (1<<15)))
else CONVFLOAT(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S32P,
*(const int32_t*)pi*(1.0 / (1U<<31)))
else CONVFLOAT(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLTP,
*(const float*)pi)
else CONVFLOAT(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_DBLP,
*(const double*)pi)
else return -1;
return 0;
}
void libav_log_callback(void *ptr, int level, const char *fmt, va_list vargs)
{
if (level <= av_log_get_level()) {
#if __cplusplus > 199711L
int len = vsnprintf(NULL, 0, fmt, vargs);
// Note: len does not include the terminating '\0' character.
// We intentionally make the buffer one character too short
// to avoid including the end-of-line character of libav.
char *buf = new char[len];
vsnprintf(buf, len, fmt, vargs);
MINILOG(logTRACE) << "libav: " << buf;
delete[] buf;
#else
vfprintf(FileLogger::get_stream(), fmt, vargs);
#endif
}
}
std::vector<float>
libav::decodeto_22050hz_mono_float(
const std::string& file,
float excerpt_length,
float excerpt_start)
{
MINILOG(logTRACE) << "Decoding: " << file << " started.";
const int target_rate = 22050;
int avret;
// show libav messages only in verbose mode
if (MiniLog::current_level() >= logTRACE) {
av_log_set_level(AV_LOG_VERBOSE);
av_log_set_callback(libav_log_callback);
}
else {
av_log_set_level(AV_LOG_PANIC);
}
// guess input format
AVFormatContext* fmtx = NULL;
avret = avformat_open_input(&fmtx, file.c_str(), NULL, NULL);
if (avret < 0) {
MINILOG(logERROR) << "Could not open file, or detect file format";
return std::vector<float>(0);
}
// retrieve stream information
#ifdef _OPENMP
#pragma omp critical
#endif
{
avret = avformat_find_stream_info(fmtx, NULL);
}
if (avret < 0) {
MINILOG(logERROR) << "Could not find stream info";
avformat_close_input(&fmtx);
return std::vector<float>(0);
}
// if there are multiple audio streams, find the best one..
int audio_stream_idx =
av_find_best_stream(fmtx, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
if (audio_stream_idx < 0) {
MINILOG(logERROR) << "Could not find audio stream in input file";
avformat_close_input(&fmtx);
return std::vector<float>(0);
}
AVStream *st = fmtx->streams[audio_stream_idx];
// find a decoder for the stream
#if (LIBAVCODEC_VERSION_INT < AV_VERSION_INT(57, 14, 0)) || ((LIBAVCODEC_VERSION_MICRO >= 100) && (LIBAVCODEC_VERSION_INT < AV_VERSION_INT(57, 33, 100)))
// old libav version (libavcodec < 57.14 for libav, < 57.33 for ffmpeg):
// stream has a codec context we can use
AVCodecContext *decx = st->codec;
#define AVCODEC_FREE_CONTEXT(x)
#else
// new libav version: need to create codec context for stream
AVCodecParameters *decp = st->codecpar;
AVCodecContext *decx = avcodec_alloc_context3(NULL);
if (!decx) {
MINILOG(logERROR) << "Could not allocate codec context";
avformat_close_input(&fmtx);
return std::vector<float>(0);
}
avret = avcodec_parameters_to_context(decx, decp);
if (avret < 0) {
MINILOG(logERROR) << "Could not set codec context";
avcodec_free_context(&decx);
avformat_close_input(&fmtx);
return std::vector<float>(0);
}
#if LIBAVCODEC_VERSION_MICRO >= 100
#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58,3,102)
// only available in ffmpeg, deprecated after 58
av_codec_set_pkt_timebase(decx, st->time_base);
#endif
#endif
#define AVCODEC_FREE_CONTEXT(x) avcodec_free_context(x)
#endif
AVCodec *dec = avcodec_find_decoder(decx->codec_id);
if (!dec) {
MINILOG(logERROR) << "Could not find codec.";
AVCODEC_FREE_CONTEXT(&decx);
avformat_close_input(&fmtx);
return std::vector<float>(0);
}
// open the decoder
// (kindly ask for stereo downmix and floats, but not all decoders care)
decx->request_channel_layout = AV_CH_LAYOUT_STEREO_DOWNMIX;
decx->request_sample_fmt = AV_SAMPLE_FMT_FLT;
#ifdef _OPENMP
#pragma omp critical
#endif
{
avret = avcodec_open2(decx, dec, NULL);
}
if (avret < 0) {
MINILOG(logERROR) << "Could not open codec.";
AVCODEC_FREE_CONTEXT(&decx);
avformat_close_input(&fmtx);
return std::vector<float>(0);
}
// Currently only mono and stereo files are supported.
if ((decx->channels != 1) && (decx->channels != 2)) {
MINILOG(logWARNING) << "Unsupported number of channels: "
<< decx->channels;
AVCODEC_FREE_CONTEXT(&decx);
avformat_close_input(&fmtx);
return std::vector<float>(0);
}
// allocate a frame
AVFrame* frame = AV_FRAME_ALLOC();
if (!frame) {
MINILOG(logWARNING) << "Could not allocate frame";
AVCODEC_FREE_CONTEXT(&decx);
avformat_close_input(&fmtx);
return std::vector<float>(0);
}
// allocate and initialize a packet
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
int got_frame = 0;
// configuration
const int input_stride = av_get_bytes_per_sample(decx->sample_fmt);
const int num_planes = av_sample_fmt_is_planar(decx->sample_fmt) ? decx->channels : 1;
const int output_stride = sizeof(float) * num_planes;
int decode_samples; // how many samples to decode; zero to decode all
if (st->duration) { // if the file length is (at least approximately) known:
float file_length = (float)st->duration * st->time_base.num / st->time_base.den;
MINILOG(logDEBUG) << "Audio file length: " << file_length << " seconds";
// adjust excerpt boundaries
if ((excerpt_length <= 0) || (excerpt_length > file_length)) {
// use full file
excerpt_length = 0;
excerpt_start = 0;
}
else if (excerpt_start < 0) {
// center in file, but start at -excerpt_start the latest
excerpt_start = std::min(-excerpt_start,
(file_length - excerpt_length) / 2);
}
else if (excerpt_start + excerpt_length > file_length) {
// right-align excerpt
excerpt_start = file_length - excerpt_length;
}
MINILOG(logTRACE) << "Will decode from " << excerpt_start << " to " <<
(excerpt_length > 0 ? (excerpt_start + excerpt_length) : file_length);
// try to skip to requested position in stream
// (Note: without AVSEEK_FLAG_BACKWARD, some MP3s cause libav to skip
// to a position where it sees mono MP1 frames, causing a segmentation
// fault when trying to access frame->data[i] for i > 0 further below)
if ((excerpt_start > 0) and (av_seek_frame(fmtx, audio_stream_idx,
excerpt_start * st->time_base.den / st->time_base.num,
AVSEEK_FLAG_BACKWARD || AVSEEK_FLAG_ANY) >= 0)) {
// skipping went fine: decode only what's needed
decode_samples = excerpt_length * decx->sample_rate;
excerpt_start = 0;
avcodec_flush_buffers(decx);
}
else {
if (excerpt_start > 0) {
MINILOG(logDEBUG) << "Could not seek in audio file.";
}
// skipping failed or not needed: decode from beginning
decode_samples = (excerpt_start + excerpt_length) * decx->sample_rate;
}
}
else { // if the file length is unknown:
MINILOG(logDEBUG) << "Audio file length: unknown";
if (excerpt_length <= 0) {
// use full file
excerpt_length = 0;
excerpt_start = 0;
decode_samples = 0;
}
else if (excerpt_start < 0) {
// center in file, but start at -excerpt_start the latest,
// so decode at most -excerpt_start+excerpt_length seconds
decode_samples = (-excerpt_start + excerpt_length) * decx->sample_rate;
}
else {
// uncentered excerpt: decode from beginning, cut out afterwards
decode_samples = (excerpt_start + excerpt_length) * decx->sample_rate;
}
}
// After this lengthy adjustment, decode_samples tells us how many samples
// to decode at most (where zero means to decode the full file), and
// excerpt_start tells us up to how many seconds to cut from the beginning.
// read packets
const int channels = decx->channels;
const int sample_rate = decx->sample_rate;
float* buffer = NULL;
int buffersize = 0;
std::vector<float> decoded_pcm;
int subsequent_errors = 0;
const int subsequent_errors_max = 20;
while ((decode_samples == 0) || ((int)decoded_pcm.size() < decode_samples))
{
// skip all frames that are not part of the audio stream, and spurious
// frames possibly found after seeking (wrong channels / sample_rate)
while (((avret = av_read_frame(fmtx, &pkt)) >= 0)
&& ((pkt.stream_index != audio_stream_idx) ||
(decx->channels != channels) ||
(decx->sample_rate != sample_rate)))
{
AV_PACKET_UNREF(&pkt);
MINILOG(logTRACE) << "Skipping frame...";
}
if (avret < 0) {
// stop decoding if av_read_frame() failed
AV_PACKET_UNREF(&pkt);
break;
}
uint8_t* data = pkt.data;
int size = pkt.size;
while (pkt.size > 0) {
// try to decode a frame
AV_FRAME_UNREF(frame);
int len = 0;
got_frame = 0;
#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(57, 48, 101)
len = avcodec_decode_audio4(decx, frame, &got_frame, &pkt);
if (len < 0) {
avret = AVERROR(EINVAL);
}
#else
avret = avcodec_receive_frame(decx, frame);
if (avret == 0) {
got_frame = 1;
}
if (avret == AVERROR(EAGAIN)) {
avret = 0;
}
if (avret == 0) {
avret = avcodec_send_packet(decx, &pkt);
if (avret == 0) {
len = pkt.size;
} else if (avret == AVERROR(EAGAIN)) {
avret = 0;
}
}
#endif
if (avret < 0) {
MINILOG(logWARNING) << "Error decoding an audio frame";
// allow some frames to fail
if (subsequent_errors < subsequent_errors_max) {
subsequent_errors++;
break;
}
// if too many frames failed decoding, abort
MINILOG(logERROR) << "Too many errors, aborting.";
AV_FRAME_FREE(&frame);
AV_PACKET_UNREF(&pkt);
avformat_close_input(&fmtx);
if (buffer) {
delete[] buffer;
}
return std::vector<float>(0);
} else {
subsequent_errors = 0;
}
// if we got a frame
if (got_frame) {
// do we need to increase the buffer size?
int input_samples = frame->nb_samples*decx->channels;
if (input_samples > buffersize) {
if (buffer) {
delete[] buffer;
}
buffer = new float[input_samples];
buffersize = input_samples;
}
// convert samples to float
// If we have planar samples (num_planes > 1), the channels
// are stored in separate frame->data[i] arrays and we
// convert to interleaved samples on the way.
for (int i = 0; i < num_planes; i++) {
if (samples_tofloat(buffer + i, frame->data[i],
output_stride, input_stride,
decx->sample_fmt,
input_samples / num_planes) < 0) {
MINILOG(logERROR) << "Strange sample format. Abort.";
AV_FRAME_FREE(&frame);
AV_PACKET_UNREF(&pkt);
avformat_close_input(&fmtx);
if (buffer) {
delete[] buffer;
}
return decoded_pcm;
}
}
// inplace downmix to mono, if required
if (decx->channels == 2) {
for (int i = 0; i < frame->nb_samples; i++) {
buffer[i] = (buffer[i*2] + buffer[i*2+1]) / 2.0f;
}
}
// store raw pcm data
decoded_pcm.insert(decoded_pcm.end(), buffer,
buffer+frame->nb_samples);
}
// consume the packet
pkt.data += len;
pkt.size -= len;
}
pkt.data = data;
pkt.size = size;
AV_PACKET_UNREF(&pkt);
}
MINILOG(logTRACE) << "Decoding loop finished.";
// cut out the requested excerpt if needed
int skip_samples = 0;
if (excerpt_start < 0) {
// center excerpt, but start at -excerpt_start the latest
float file_length = decoded_pcm.size() / decx->sample_rate;
if (file_length > excerpt_length) {
// skip beginning as needed
excerpt_start = std::min(-excerpt_start,
(file_length - excerpt_length) / 2);
skip_samples = excerpt_start * decx->sample_rate;
// truncate end if needed
int target_samples = skip_samples + excerpt_length * decx->sample_rate;
if (target_samples < (int)decoded_pcm.size()) {
decoded_pcm.resize(target_samples);
}
}
}
else if (excerpt_length > 0) {
// truncate to target length if needed
if ((int)decoded_pcm.size() > decode_samples) {
decoded_pcm.resize(decode_samples);
}
// skip beginning if needed
if (excerpt_start > 0) {
skip_samples = excerpt_start * decx->sample_rate;
int missed_samples = decode_samples - decoded_pcm.size();
skip_samples = std::max(0, skip_samples - missed_samples);
}
}
// do we need to resample?
std::vector<float> pcm;
if (target_rate != decx->sample_rate) {
MINILOG(logTRACE) << "Resampling signal. input="
<< decx->sample_rate << ", target=" << target_rate;
resampler r(decx->sample_rate, target_rate);
pcm = r.resample(decoded_pcm.data() + skip_samples, decoded_pcm.size() - skip_samples);
MINILOG(logTRACE) << "Resampling finished.";
} else {
pcm.resize(decoded_pcm.size() - skip_samples);
std::copy(decoded_pcm.begin() + skip_samples, decoded_pcm.end(), pcm.begin());
}
// cleanup
if (buffer) {
delete[] buffer;
}
AV_FRAME_FREE(&frame);
#ifdef _OPENMP
#pragma omp critical
#endif
{
avcodec_close(decx);
AVCODEC_FREE_CONTEXT(&decx);
avformat_close_input(&fmtx);
}
MINILOG(logTRACE) << "Decoding: " << file << " finalized.";
return pcm;
}
} /* namespace decoders */
} /* namespace musly */