-
-
Notifications
You must be signed in to change notification settings - Fork 339
/
AVCodecDecoder.cpp
1830 lines (1519 loc) · 59.9 KB
/
AVCodecDecoder.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright (C) 2001 Carlos Hasan
* Copyright (C) 2001 François Revol
* Copyright (C) 2001 Axel Dörfler
* Copyright (C) 2004 Marcus Overhagen
* Copyright (C) 2009 Stephan Amßus <superstippi@gmx.de>
* Copyright (C) 2014 Colin Günther <coling@gmx.de>
* Copyright (C) 2015 Adrien Destugues <pulkomandy@pulkomandy.tk>
*
* All rights reserved. Distributed under the terms of the MIT License.
*/
//! libavcodec based decoder for Haiku
#include "AVCodecDecoder.h"
#include <new>
#include <assert.h>
#include <string.h>
#include <Bitmap.h>
#include <Debug.h>
#include <String.h>
#include "Utilities.h"
#undef TRACE
//#define TRACE_AV_CODEC
#ifdef TRACE_AV_CODEC
# define TRACE(x...) printf(x)
# define TRACE_AUDIO(x...) printf(x)
# define TRACE_VIDEO(x...) printf(x)
#else
# define TRACE(x...)
# define TRACE_AUDIO(x...)
# define TRACE_VIDEO(x...)
#endif
//#define LOG_STREAM_TO_FILE
#ifdef LOG_STREAM_TO_FILE
# include <File.h>
static BFile sAudioStreamLogFile(
"/boot/home/Desktop/AVCodecDebugAudioStream.raw",
B_CREATE_FILE | B_ERASE_FILE | B_WRITE_ONLY);
static BFile sVideoStreamLogFile(
"/boot/home/Desktop/AVCodecDebugVideoStream.raw",
B_CREATE_FILE | B_ERASE_FILE | B_WRITE_ONLY);
static int sDumpedPackets = 0;
#endif
typedef AVCodecID CodecID;
struct wave_format_ex {
uint16 format_tag;
uint16 channels;
uint32 frames_per_sec;
uint32 avg_bytes_per_sec;
uint16 block_align;
uint16 bits_per_sample;
uint16 extra_size;
// extra_data[extra_size]
} _PACKED;
struct avformat_codec_context {
int sample_rate;
int channels;
};
// profiling related globals
#define DO_PROFILING 0
#if DO_PROFILING
static bigtime_t decodingTime = 0;
static bigtime_t conversionTime = 0;
static long profileCounter = 0;
#endif
AVCodecDecoder::AVCodecDecoder()
:
fHeader(),
fInputFormat(),
fFrame(0),
fIsAudio(false),
fCodec(NULL),
fCodecContext(avcodec_alloc_context3(NULL)),
fResampleContext(NULL),
fDecodedData(NULL),
fDecodedDataSizeInBytes(0),
fPostProcessedDecodedPicture(av_frame_alloc()),
fRawDecodedPicture(av_frame_alloc()),
fRawDecodedAudio(av_frame_alloc()),
fCodecInitDone(false),
#if USE_SWS_FOR_COLOR_SPACE_CONVERSION
fSwsContext(NULL),
#else
fFormatConversionFunc(NULL),
#endif
fExtraData(NULL),
fExtraDataSize(0),
fBlockAlign(0),
fOutputColorSpace(B_NO_COLOR_SPACE),
fOutputFrameCount(0),
fOutputFrameRate(1.0),
fOutputFrameSize(0),
fInputFrameSize(0),
fChunkBuffer(NULL),
fChunkBufferSize(0),
fAudioDecodeError(false),
fDecodedDataBuffer(av_frame_alloc()),
fDecodedDataBufferOffset(0),
fDecodedDataBufferSize(0),
fBufferSinkContext(NULL),
fBufferSourceContext(NULL),
fFilterGraph(NULL),
fFilterFrame(NULL)
{
TRACE("AVCodecDecoder::AVCodecDecoder()\n");
system_info info;
get_system_info(&info);
fCodecContext->err_recognition = AV_EF_CAREFUL;
fCodecContext->error_concealment = 3;
fCodecContext->thread_count = info.cpu_count;
}
AVCodecDecoder::~AVCodecDecoder()
{
TRACE("[%c] AVCodecDecoder::~AVCodecDecoder()\n", fIsAudio?('a'):('v'));
#if DO_PROFILING
if (profileCounter > 0) {
printf("[%c] profile: d1 = %lld, d2 = %lld (%lld)\n",
fIsAudio?('a'):('v'), decodingTime / profileCounter,
conversionTime / profileCounter, fFrame);
}
#endif
if (fCodecInitDone)
avcodec_close(fCodecContext);
swr_free(&fResampleContext);
free(fChunkBuffer);
free(fDecodedData);
av_free(fPostProcessedDecodedPicture);
av_free(fRawDecodedPicture);
av_free(fRawDecodedAudio->opaque);
av_free(fRawDecodedAudio);
av_free(fCodecContext);
av_free(fDecodedDataBuffer);
av_frame_free(&fFilterFrame);
avfilter_graph_free(&fFilterGraph);
#if USE_SWS_FOR_COLOR_SPACE_CONVERSION
if (fSwsContext != NULL)
sws_freeContext(fSwsContext);
#endif
delete[] fExtraData;
}
void
AVCodecDecoder::GetCodecInfo(media_codec_info* mci)
{
snprintf(mci->short_name, 32, "%s", fCodec->name);
snprintf(mci->pretty_name, 96, "%s", fCodec->long_name);
mci->id = 0;
mci->sub_id = fCodec->id;
}
status_t
AVCodecDecoder::Setup(media_format* ioEncodedFormat, const void* infoBuffer,
size_t infoSize)
{
if (ioEncodedFormat->type != B_MEDIA_ENCODED_AUDIO
&& ioEncodedFormat->type != B_MEDIA_ENCODED_VIDEO)
return B_ERROR;
fIsAudio = (ioEncodedFormat->type == B_MEDIA_ENCODED_AUDIO);
TRACE("[%c] AVCodecDecoder::Setup()\n", fIsAudio?('a'):('v'));
#ifdef TRACE_AV_CODEC
char buffer[1024];
string_for_format(*ioEncodedFormat, buffer, sizeof(buffer));
TRACE("[%c] input_format = %s\n", fIsAudio?('a'):('v'), buffer);
TRACE("[%c] infoSize = %ld\n", fIsAudio?('a'):('v'), infoSize);
TRACE("[%c] user_data_type = %08lx\n", fIsAudio?('a'):('v'),
ioEncodedFormat->user_data_type);
TRACE("[%c] meta_data_size = %ld\n", fIsAudio?('a'):('v'),
ioEncodedFormat->MetaDataSize());
#endif
media_format_description description;
if (BMediaFormats().GetCodeFor(*ioEncodedFormat,
B_MISC_FORMAT_FAMILY, &description) == B_OK) {
if (description.u.misc.file_format != 'ffmp')
return B_NOT_SUPPORTED;
fCodec = avcodec_find_decoder(static_cast<CodecID>(
description.u.misc.codec));
if (fCodec == NULL) {
TRACE(" unable to find the correct FFmpeg "
"decoder (id = %lu)\n", description.u.misc.codec);
return B_ERROR;
}
TRACE(" found decoder %s\n", fCodec->name);
const void* extraData = infoBuffer;
fExtraDataSize = infoSize;
if (description.family == B_WAV_FORMAT_FAMILY
&& infoSize >= sizeof(wave_format_ex)) {
TRACE(" trying to use wave_format_ex\n");
// Special case extra data in B_WAV_FORMAT_FAMILY
const wave_format_ex* waveFormatData
= (const wave_format_ex*)infoBuffer;
size_t waveFormatSize = infoSize;
if (waveFormatData != NULL && waveFormatSize > 0) {
fBlockAlign = waveFormatData->block_align;
TRACE(" found block align: %d\n", fBlockAlign);
fExtraDataSize = waveFormatData->extra_size;
// skip the wave_format_ex from the extra data.
extraData = waveFormatData + 1;
}
} else {
if (fIsAudio) {
fBlockAlign
= ioEncodedFormat->u.encoded_audio.output.buffer_size;
TRACE(" using buffer_size as block align: %d\n",
fBlockAlign);
}
}
if (extraData != NULL && fExtraDataSize > 0) {
TRACE("AVCodecDecoder: extra data size %ld\n", infoSize);
delete[] fExtraData;
fExtraData = new(std::nothrow) char[fExtraDataSize];
if (fExtraData != NULL)
memcpy(fExtraData, infoBuffer, fExtraDataSize);
else
fExtraDataSize = 0;
}
fInputFormat = *ioEncodedFormat;
return B_OK;
} else {
TRACE("AVCodecDecoder: BMediaFormats().GetCodeFor() failed.\n");
}
printf("AVCodecDecoder::Setup failed!\n");
return B_ERROR;
}
status_t
AVCodecDecoder::SeekedTo(int64 frame, bigtime_t time)
{
status_t ret = B_OK;
// Reset the FFmpeg codec to flush buffers, so we keep the sync
if (fCodecInitDone) {
avcodec_flush_buffers(fCodecContext);
_ResetTempPacket();
}
// Flush internal buffers as well.
free(fChunkBuffer);
fChunkBuffer = NULL;
fChunkBufferSize = 0;
fDecodedDataBufferOffset = 0;
fDecodedDataBufferSize = 0;
fDecodedDataSizeInBytes = 0;
fFrame = frame;
return ret;
}
status_t
AVCodecDecoder::NegotiateOutputFormat(media_format* inOutFormat)
{
TRACE("AVCodecDecoder::NegotiateOutputFormat() [%c] \n",
fIsAudio?('a'):('v'));
#ifdef TRACE_AV_CODEC
char buffer[1024];
string_for_format(*inOutFormat, buffer, sizeof(buffer));
TRACE(" [%c] requested format = %s\n", fIsAudio?('a'):('v'), buffer);
#endif
if (fIsAudio)
return _NegotiateAudioOutputFormat(inOutFormat);
else
return _NegotiateVideoOutputFormat(inOutFormat);
}
status_t
AVCodecDecoder::Decode(void* outBuffer, int64* outFrameCount,
media_header* mediaHeader, media_decode_info* info)
{
if (!fCodecInitDone)
return B_NO_INIT;
status_t ret;
if (fIsAudio)
ret = _DecodeAudio(outBuffer, outFrameCount, mediaHeader, info);
else
ret = _DecodeVideo(outBuffer, outFrameCount, mediaHeader, info);
return ret;
}
// #pragma mark -
void
AVCodecDecoder::_ResetTempPacket()
{
av_init_packet(&fTempPacket);
fTempPacket.size = 0;
fTempPacket.data = NULL;
}
status_t
AVCodecDecoder::_NegotiateAudioOutputFormat(media_format* inOutFormat)
{
TRACE("AVCodecDecoder::_NegotiateAudioOutputFormat()\n");
_ApplyEssentialAudioContainerPropertiesToContext();
// This makes audio formats play that encode the audio properties in
// the audio container (e.g. WMA) and not in the audio frames
// themself (e.g. MP3).
// Note: Doing this step unconditionally is OK, because the first call
// to _DecodeNextAudioFrameChunk() will update the essential audio
// format properties accordingly regardless of the settings here.
// close any previous instance
if (fCodecInitDone) {
fCodecInitDone = false;
avcodec_close(fCodecContext);
}
if (avcodec_open2(fCodecContext, fCodec, NULL) >= 0)
fCodecInitDone = true;
else {
TRACE("avcodec_open() failed to init codec!\n");
return B_ERROR;
}
free(fChunkBuffer);
fChunkBuffer = NULL;
fChunkBufferSize = 0;
fAudioDecodeError = false;
fDecodedDataBufferOffset = 0;
fDecodedDataBufferSize = 0;
_ResetTempPacket();
status_t statusOfDecodingFirstFrameChunk = _DecodeNextAudioFrameChunk();
if (statusOfDecodingFirstFrameChunk != B_OK) {
TRACE("[a] decoding first audio frame chunk failed\n");
return B_ERROR;
}
media_multi_audio_format outputAudioFormat;
outputAudioFormat = media_raw_audio_format::wildcard;
outputAudioFormat.byte_order = B_MEDIA_HOST_ENDIAN;
outputAudioFormat.frame_rate = fCodecContext->sample_rate;
outputAudioFormat.channel_count = fCodecContext->channels;
ConvertAVSampleFormatToRawAudioFormat(fCodecContext->sample_fmt,
outputAudioFormat.format);
// Check that format is not still a wild card!
if (outputAudioFormat.format == 0) {
TRACE(" format still a wild-card, assuming B_AUDIO_SHORT.\n");
outputAudioFormat.format = media_raw_audio_format::B_AUDIO_SHORT;
}
outputAudioFormat.buffer_size = inOutFormat->u.raw_audio.buffer_size;
// Check that buffer_size has a sane value
size_t sampleSize = outputAudioFormat.format
& media_raw_audio_format::B_AUDIO_SIZE_MASK;
if (outputAudioFormat.buffer_size == 0) {
outputAudioFormat.buffer_size = 512 * sampleSize
* outputAudioFormat.channel_count;
}
inOutFormat->type = B_MEDIA_RAW_AUDIO;
inOutFormat->u.raw_audio = outputAudioFormat;
inOutFormat->require_flags = 0;
inOutFormat->deny_flags = B_MEDIA_MAUI_UNDEFINED_FLAGS;
// Initialize variables needed to manage decoding as much audio frames as
// needed to fill the buffer_size.
fOutputFrameSize = sampleSize * outputAudioFormat.channel_count;
fOutputFrameCount = outputAudioFormat.buffer_size / fOutputFrameSize;
fOutputFrameRate = outputAudioFormat.frame_rate;
if (av_sample_fmt_is_planar(fCodecContext->sample_fmt))
fInputFrameSize = sampleSize;
else
fInputFrameSize = fOutputFrameSize;
fRawDecodedAudio->opaque
= av_realloc(fRawDecodedAudio->opaque, sizeof(avformat_codec_context));
if (fRawDecodedAudio->opaque == NULL)
return B_NO_MEMORY;
if (av_sample_fmt_is_planar(fCodecContext->sample_fmt)) {
fResampleContext = swr_alloc_set_opts(NULL,
fCodecContext->channel_layout,
fCodecContext->request_sample_fmt,
fCodecContext->sample_rate,
fCodecContext->channel_layout,
fCodecContext->sample_fmt,
fCodecContext->sample_rate,
0, NULL);
swr_init(fResampleContext);
}
TRACE(" bit_rate = %d, sample_rate = %d, channels = %d, "
"output frame size: %d, count: %ld, rate: %.2f\n",
fCodecContext->bit_rate, fCodecContext->sample_rate, fCodecContext->channels,
fOutputFrameSize, fOutputFrameCount, fOutputFrameRate);
return B_OK;
}
status_t
AVCodecDecoder::_NegotiateVideoOutputFormat(media_format* inOutFormat)
{
TRACE("AVCodecDecoder::_NegotiateVideoOutputFormat()\n");
TRACE(" requested video format 0x%x\n",
inOutFormat->u.raw_video.display.format);
_ApplyEssentialVideoContainerPropertiesToContext();
// This makes video formats play that encode the video properties in
// the video container (e.g. WMV) and not in the video frames
// themself (e.g. MPEG2).
// Note: Doing this step unconditionally is OK, because the first call
// to _DecodeNextVideoFrame() will update the essential video format
// properties accordingly regardless of the settings here.
bool codecCanHandleIncompleteFrames
= (fCodec->capabilities & AV_CODEC_CAP_TRUNCATED) != 0;
if (codecCanHandleIncompleteFrames) {
// Expect and handle video frames to be splitted across consecutive
// data chunks.
fCodecContext->flags |= AV_CODEC_FLAG_TRUNCATED;
}
// close any previous instance
if (fCodecInitDone) {
fCodecInitDone = false;
avcodec_close(fCodecContext);
}
if (avcodec_open2(fCodecContext, fCodec, NULL) >= 0)
fCodecInitDone = true;
else {
TRACE("avcodec_open() failed to init codec!\n");
return B_ERROR;
}
#if USE_SWS_FOR_COLOR_SPACE_CONVERSION
fOutputColorSpace = B_RGB32;
#else
// Make MediaPlayer happy (if not in rgb32 screen depth and no overlay,
// it will only ask for YCbCr, which DrawBitmap doesn't handle, so the
// default colordepth is RGB32).
if (inOutFormat->u.raw_video.display.format == B_YCbCr422)
fOutputColorSpace = B_YCbCr422;
else
fOutputColorSpace = B_RGB32;
#endif
#if USE_SWS_FOR_COLOR_SPACE_CONVERSION
if (fSwsContext != NULL)
sws_freeContext(fSwsContext);
fSwsContext = NULL;
#else
fFormatConversionFunc = 0;
#endif
free(fChunkBuffer);
fChunkBuffer = NULL;
fChunkBufferSize = 0;
_ResetTempPacket();
status_t statusOfDecodingFirstFrame = _DecodeNextVideoFrame();
if (statusOfDecodingFirstFrame != B_OK) {
TRACE("[v] decoding first video frame failed\n");
return B_ERROR;
}
// Note: fSwsContext / fFormatConversionFunc should have been initialized
// by first call to _DecodeNextVideoFrame() above.
#if USE_SWS_FOR_COLOR_SPACE_CONVERSION
if (fSwsContext == NULL) {
TRACE("No SWS Scale context or decoder has not set the pixel format "
"yet!\n");
}
#else
if (fFormatConversionFunc == NULL) {
TRACE("no pixel format conversion function found or decoder has "
"not set the pixel format yet!\n");
}
#endif
inOutFormat->type = B_MEDIA_RAW_VIDEO;
inOutFormat->require_flags = 0;
inOutFormat->deny_flags = B_MEDIA_MAUI_UNDEFINED_FLAGS;
inOutFormat->u.raw_video = fInputFormat.u.encoded_video.output;
inOutFormat->u.raw_video.interlace = 1;
// Progressive (non-interlaced) video frames are delivered
inOutFormat->u.raw_video.first_active
= fHeader.u.raw_video.first_active_line;
inOutFormat->u.raw_video.last_active = fHeader.u.raw_video.line_count;
inOutFormat->u.raw_video.pixel_width_aspect
= fHeader.u.raw_video.pixel_width_aspect;
inOutFormat->u.raw_video.pixel_height_aspect
= fHeader.u.raw_video.pixel_height_aspect;
#if 0
// This was added by Colin Günther in order to handle streams with a
// variable frame rate. fOutputFrameRate is computed from the stream
// time_base, but it actually assumes a timebase equal to the FPS. As far
// as I can see, a stream with a variable frame rate would have a higher
// resolution time_base and increment the pts (presentation time) of each
// frame by a value bigger than one.
//
// Fixed rate stream:
// time_base = 1/50s, frame PTS = 1, 2, 3... (for 50Hz)
//
// Variable rate stream:
// time_base = 1/300s, frame PTS = 6, 12, 18, ... (for 50Hz)
// time_base = 1/300s, frame PTS = 5, 10, 15, ... (for 60Hz)
//
// The fOutputFrameRate currently does not take this into account and
// ignores the PTS. This results in playing the above sample at 300Hz
// instead of 50 or 60.
//
// However, comparing the PTS for two consecutive implies we have already
// decoded 2 frames, which may not be the case when this method is first
// called.
inOutFormat->u.raw_video.field_rate = fOutputFrameRate;
// Was calculated by first call to _DecodeNextVideoFrame()
#endif
inOutFormat->u.raw_video.display.format = fOutputColorSpace;
inOutFormat->u.raw_video.display.line_width
= fHeader.u.raw_video.display_line_width;
inOutFormat->u.raw_video.display.line_count
= fHeader.u.raw_video.display_line_count;
inOutFormat->u.raw_video.display.bytes_per_row
= fHeader.u.raw_video.bytes_per_row;
#ifdef TRACE_AV_CODEC
char buffer[1024];
string_for_format(*inOutFormat, buffer, sizeof(buffer));
TRACE("[v] outFormat = %s\n", buffer);
TRACE(" returned video format 0x%x\n",
inOutFormat->u.raw_video.display.format);
#endif
return B_OK;
}
/*! \brief Fills the outBuffer with one or more already decoded audio frames.
Besides the main duty described above, this method also fills out the other
output parameters as documented below.
\param outBuffer Pointer to the output buffer to copy the decoded audio
frames to.
\param outFrameCount Pointer to the output variable to assign the number of
copied audio frames (usually several audio frames at once).
\param mediaHeader Pointer to the output media header that contains the
properties of the decoded audio frame being the first in the outBuffer.
\param info Specifies additional decoding parameters. (Note: unused).
\returns B_OK Decoding audio frames succeeded.
\returns B_LAST_BUFFER_ERROR There are no more audio frames available.
\returns Other error codes
*/
status_t
AVCodecDecoder::_DecodeAudio(void* outBuffer, int64* outFrameCount,
media_header* mediaHeader, media_decode_info* info)
{
TRACE_AUDIO("AVCodecDecoder::_DecodeAudio(audio start_time %.6fs)\n",
mediaHeader->start_time / 1000000.0);
status_t audioDecodingStatus
= fDecodedDataSizeInBytes > 0 ? B_OK : _DecodeNextAudioFrame();
if (audioDecodingStatus != B_OK)
return audioDecodingStatus;
*outFrameCount = fDecodedDataSizeInBytes / fOutputFrameSize;
*mediaHeader = fHeader;
memcpy(outBuffer, fDecodedData, fDecodedDataSizeInBytes);
fDecodedDataSizeInBytes = 0;
return B_OK;
}
/*! \brief Fills the outBuffer with an already decoded video frame.
Besides the main duty described above, this method also fills out the other
output parameters as documented below.
\param outBuffer Pointer to the output buffer to copy the decoded video
frame to.
\param outFrameCount Pointer to the output variable to assign the number of
copied video frames (usually one video frame).
\param mediaHeader Pointer to the output media header that contains the
decoded video frame properties.
\param info Specifies additional decoding parameters. (Note: unused).
\returns B_OK Decoding a video frame succeeded.
\returns B_LAST_BUFFER_ERROR There are no more video frames available.
\returns Other error codes
*/
status_t
AVCodecDecoder::_DecodeVideo(void* outBuffer, int64* outFrameCount,
media_header* mediaHeader, media_decode_info* info)
{
status_t videoDecodingStatus
= fDecodedDataSizeInBytes > 0 ? B_OK : _DecodeNextVideoFrame();
if (videoDecodingStatus != B_OK)
return videoDecodingStatus;
*outFrameCount = 1;
*mediaHeader = fHeader;
memcpy(outBuffer, fDecodedData, mediaHeader->size_used);
fDecodedDataSizeInBytes = 0;
return B_OK;
}
/*! \brief Decodes next audio frame.
We decode at least one audio frame into fDecodedData. To achieve this goal,
we might need to request several chunks of encoded data resulting in a
variable execution time of this function.
The length of the decoded audio frame(s) is stored in
fDecodedDataSizeInBytes. If this variable is greater than zero you can
assert that all audio frames in fDecodedData are valid.
It is assumed that the number of expected audio frames is stored in
fOutputFrameCount. So _DecodeNextAudioFrame() must be called only after
fOutputFrameCount has been set.
Note: fOutputFrameCount contains the maximum number of frames a caller
of BMediaDecoder::Decode() expects to receive. There is a direct
relationship between fOutputFrameCount and the buffer size a caller of
BMediaDecoder::Decode() will provide so we make sure to respect this limit
for fDecodedDataSizeInBytes.
On return with status code B_OK the following conditions hold true:
1. fDecodedData contains as much audio frames as the caller of
BMediaDecoder::Decode() expects.
2. fDecodedData contains lesser audio frames as the caller of
BMediaDecoder::Decode() expects only when one of the following
conditions hold true:
i No more audio frames left. Consecutive calls to
_DecodeNextAudioFrame() will then result in the return of
status code B_LAST_BUFFER_ERROR.
ii TODO: A change in the size of the audio frames.
3. fHeader is populated with the audio frame properties of the first
audio frame in fDecodedData. Especially the start_time field of
fHeader relates to that first audio frame. Start times of
consecutive audio frames in fDecodedData have to be calculated
manually (using the frame rate and the frame duration) if the
caller needs them.
TODO: Handle change of channel_count. Such a change results in a change of
the audio frame size and thus has different buffer requirements.
The most sane approach for implementing this is to return the audio frames
that were still decoded with the previous channel_count and inform the
client of BMediaDecoder::Decode() about the change so that it can adapt to
it. Furthermore we need to adapt our fDecodedData to the new buffer size
requirements accordingly.
\returns B_OK when we successfully decoded enough audio frames
\returns B_LAST_BUFFER_ERROR when there are no more audio frames available.
\returns Other Errors
*/
status_t
AVCodecDecoder::_DecodeNextAudioFrame()
{
assert(fTempPacket.size >= 0);
assert(fDecodedDataSizeInBytes == 0);
// _DecodeNextAudioFrame needs to be called on empty fDecodedData only!
// If this assert holds wrong we have a bug somewhere.
status_t resetStatus = _ResetRawDecodedAudio();
if (resetStatus != B_OK)
return resetStatus;
while (fRawDecodedAudio->nb_samples < fOutputFrameCount) {
_CheckAndFixConditionsThatHintAtBrokenAudioCodeBelow();
bool decodedDataBufferHasData = fDecodedDataBufferSize > 0;
if (decodedDataBufferHasData) {
_MoveAudioFramesToRawDecodedAudioAndUpdateStartTimes();
continue;
}
status_t decodeAudioChunkStatus = _DecodeNextAudioFrameChunk();
if (decodeAudioChunkStatus == B_LAST_BUFFER_ERROR
&& fRawDecodedAudio->nb_samples > 0)
break;
if (decodeAudioChunkStatus != B_OK)
return decodeAudioChunkStatus;
}
fFrame += fRawDecodedAudio->nb_samples;
fDecodedDataSizeInBytes = fRawDecodedAudio->linesize[0];
_UpdateMediaHeaderForAudioFrame();
#ifdef DEBUG
dump_ffframe_audio(fRawDecodedAudio, "ffaudi");
#endif
TRACE_AUDIO(" frame count: %ld current: %lld\n",
fRawDecodedAudio->nb_samples, fFrame);
return B_OK;
}
/*! \brief Applies all essential audio input properties to fCodecContext that were
passed to AVCodecDecoder when Setup() was called.
Note: This function must be called before the AVCodec is opened via
avcodec_open2(). Otherwise the behaviour of FFMPEG's audio decoding
function avcodec_receive_frame() is undefined.
Essential properties applied from fInputFormat.u.encoded_audio:
- bit_rate copied to fCodecContext->bit_rate
- frame_size copied to fCodecContext->frame_size
- output.format converted to fCodecContext->sample_fmt
- output.frame_rate copied to fCodecContext->sample_rate
- output.channel_count copied to fCodecContext->channels
Other essential properties being applied:
- fBlockAlign to fCodecContext->block_align
- fExtraData to fCodecContext->extradata
- fExtraDataSize to fCodecContext->extradata_size
TODO: Either the following documentation section should be removed or this
TODO when it is clear whether fInputFormat.MetaData() and
fInputFormat.MetaDataSize() have to be applied to fCodecContext. See the related
TODO in the method implementation.
Only applied when fInputFormat.MetaDataSize() is greater than zero:
- fInputFormat.MetaData() to fCodecContext->extradata
- fInputFormat.MetaDataSize() to fCodecContext->extradata_size
*/
void
AVCodecDecoder::_ApplyEssentialAudioContainerPropertiesToContext()
{
media_encoded_audio_format containerProperties
= fInputFormat.u.encoded_audio;
fCodecContext->bit_rate
= static_cast<int>(containerProperties.bit_rate);
fCodecContext->frame_size
= static_cast<int>(containerProperties.frame_size);
ConvertRawAudioFormatToAVSampleFormat(
containerProperties.output.format, fCodecContext->sample_fmt);
ConvertRawAudioFormatToAVSampleFormat(
containerProperties.output.format, fCodecContext->request_sample_fmt);
fCodecContext->sample_rate
= static_cast<int>(containerProperties.output.frame_rate);
fCodecContext->channels
= static_cast<int>(containerProperties.output.channel_count);
// Check that channel count is not still a wild card!
if (fCodecContext->channels == 0) {
TRACE(" channel_count still a wild-card, assuming stereo.\n");
fCodecContext->channels = 2;
}
fCodecContext->block_align = fBlockAlign;
fCodecContext->extradata = reinterpret_cast<uint8_t*>(fExtraData);
fCodecContext->extradata_size = fExtraDataSize;
// TODO: This probably needs to go away, there is some misconception
// about extra data / info buffer and meta data. See
// Reader::GetStreamInfo(). The AVFormatReader puts extradata and
// extradata_size into media_format::MetaData(), but used to ignore
// the infoBuffer passed to GetStreamInfo(). I think this may be why
// the code below was added.
if (fInputFormat.MetaDataSize() > 0) {
fCodecContext->extradata = static_cast<uint8_t*>(
const_cast<void*>(fInputFormat.MetaData()));
fCodecContext->extradata_size = fInputFormat.MetaDataSize();
}
TRACE(" bit_rate %d, sample_rate %d, channels %d, block_align %d, "
"extradata_size %d\n",
fCodecContext->bit_rate,
fCodecContext->sample_rate,
fCodecContext->channels,
fCodecContext->block_align,
fCodecContext->extradata_size);
}
/*! \brief Resets important fields in fRawDecodedVideo to their default values.
Note: Also initializes fDecodedData if not done already.
\returns B_OK Resetting successfully completed.
\returns B_NO_MEMORY No memory left for correct operation.
*/
status_t
AVCodecDecoder::_ResetRawDecodedAudio()
{
if (fDecodedData == NULL) {
size_t maximumSizeOfDecodedData = fOutputFrameCount * fOutputFrameSize;
fDecodedData
= static_cast<uint8_t*>(malloc(maximumSizeOfDecodedData));
}
if (fDecodedData == NULL)
return B_NO_MEMORY;
fRawDecodedAudio->data[0] = fDecodedData;
fRawDecodedAudio->linesize[0] = 0;
fRawDecodedAudio->format = AV_SAMPLE_FMT_NONE;
fRawDecodedAudio->pkt_dts = AV_NOPTS_VALUE;
fRawDecodedAudio->nb_samples = 0;
memset(fRawDecodedAudio->opaque, 0, sizeof(avformat_codec_context));
return B_OK;
}
/*! \brief Checks fDecodedDataBufferSize and fTempPacket for invalid values,
reports them and assigns valid values.
Note: This method is intended to be called before any code is executed that
deals with moving, loading or decoding any audio frames.
*/
void
AVCodecDecoder::_CheckAndFixConditionsThatHintAtBrokenAudioCodeBelow()
{
if (fDecodedDataBufferSize < 0) {
fprintf(stderr, "Decoding read past the end of the decoded data "
"buffer! %" B_PRId32 "\n", fDecodedDataBufferSize);
fDecodedDataBufferSize = 0;
}
if (fTempPacket.size < 0) {
fprintf(stderr, "Decoding read past the end of the temp packet! %d\n",
fTempPacket.size);
fTempPacket.size = 0;
}
}
/*! \brief Moves audio frames from fDecodedDataBuffer to fRawDecodedAudio (and
thus to fDecodedData) and updates the start times of fRawDecodedAudio,
fDecodedDataBuffer and fTempPacket accordingly.
When moving audio frames to fRawDecodedAudio this method also makes sure
that the following important fields of fRawDecodedAudio are populated and
updated with correct values:
- fRawDecodedAudio->data[0]: Points to first free byte of fDecodedData
- fRawDecodedAudio->linesize[0]: Total size of frames in fDecodedData
- fRawDecodedAudio->format: Format of first audio frame
- fRawDecodedAudio->pkt_dts: Start time of first audio frame
- fRawDecodedAudio->nb_samples: Number of audio frames
- fRawDecodedAudio->opaque: Contains the following fields for the first
audio frame:
- channels: Channel count of first audio frame
- sample_rate: Frame rate of first audio frame
This function assumes to be called only when the following assumptions
hold true:
1. There are decoded audio frames available in fDecodedDataBuffer
meaning that fDecodedDataBufferSize is greater than zero.
2. There is space left in fRawDecodedAudio to move some audio frames
in. This means that fRawDecodedAudio has lesser audio frames than
the maximum allowed (specified by fOutputFrameCount).
3. The audio frame rate is known so that we can calculate the time
range (covered by the moved audio frames) to update the start times
accordingly.
4. The field fRawDecodedAudio->opaque points to a memory block
representing a structure of type avformat_codec_context.
After this function returns the caller can safely make the following
assumptions:
1. The number of decoded audio frames in fDecodedDataBuffer is
decreased though it may still be greater then zero.
2. The number of frames in fRawDecodedAudio has increased and all
important fields are updated (see listing above).
3. Start times of fDecodedDataBuffer and fTempPacket were increased
with the time range covered by the moved audio frames.
Note: This function raises an exception (by calling the debugger), when
fDecodedDataBufferSize is not a multiple of fOutputFrameSize.
*/
void
AVCodecDecoder::_MoveAudioFramesToRawDecodedAudioAndUpdateStartTimes()
{
assert(fDecodedDataBufferSize > 0);
assert(fRawDecodedAudio->nb_samples < fOutputFrameCount);
assert(fOutputFrameRate > 0);
int32 outFrames = fOutputFrameCount - fRawDecodedAudio->nb_samples;
int32 inFrames = fDecodedDataBufferSize;
int32 frames = min_c(outFrames, inFrames);
if (frames == 0)
debugger("fDecodedDataBufferSize not multiple of frame size!");
// Some decoders do not support format conversion on themselves, or use
// "planar" audio (each channel separated instead of interleaved samples).
// In that case, we use swresample to convert the data
if (av_sample_fmt_is_planar(fCodecContext->sample_fmt)) {
#if 0
const uint8_t* ptr[8];
for (int i = 0; i < 8; i++) {
if (fDecodedDataBuffer->data[i] == NULL)
ptr[i] = NULL;
else
ptr[i] = fDecodedDataBuffer->data[i] + fDecodedDataBufferOffset;
}
// When there are more input frames than space in the output buffer,
// we could feed everything to swr and it would buffer the extra data.
// However, there is no easy way to flush that data without feeding more
// input, and it makes our timestamp computations fail.
// So, we feed only as much frames as we can get out, and handle the
// buffering ourselves.
// TODO Ideally, we should try to size our output buffer so that it can
// always hold all the output (swr provides helper functions for this)
inFrames = frames;
frames = swr_convert(fResampleContext, fRawDecodedAudio->data,
outFrames, ptr, inFrames);
if (frames < 0)
debugger("resampling failed");
#else
// interleave planar audio with same format
uintptr_t out = (uintptr_t)fRawDecodedAudio->data[0];
int32 offset = fDecodedDataBufferOffset;
for (int i = 0; i < frames; i++) {
for (int j = 0; j < fCodecContext->channels; j++) {
memcpy((void*)out, fDecodedDataBuffer->data[j]
+ offset, fInputFrameSize);
out += fInputFrameSize;
}
offset += fInputFrameSize;
}
outFrames = frames;
inFrames = frames;
#endif
} else {
memcpy(fRawDecodedAudio->data[0], fDecodedDataBuffer->data[0]
+ fDecodedDataBufferOffset, frames * fOutputFrameSize);
outFrames = frames;
inFrames = frames;
}
size_t remainingSize = inFrames * fInputFrameSize;
size_t decodedSize = outFrames * fOutputFrameSize;
fDecodedDataBufferSize -= inFrames;
bool firstAudioFramesCopiedToRawDecodedAudio
= fRawDecodedAudio->data[0] != fDecodedData;
if (!firstAudioFramesCopiedToRawDecodedAudio) {
fRawDecodedAudio->format = fDecodedDataBuffer->format;
fRawDecodedAudio->pkt_dts = fDecodedDataBuffer->pkt_dts;
avformat_codec_context* codecContext
= static_cast<avformat_codec_context*>(fRawDecodedAudio->opaque);
codecContext->channels = fCodecContext->channels;