Skip to content

Commit

Permalink
AVCodecDecoder: use swresample to interleave audio channels.
Browse files Browse the repository at this point in the history
Some codecs will always output audio in planar mode no matter what we
request. This is the case for example with AAC used for youtube. We now
use swresample to convert from planar to packed format.

Note that since swresample does its own buffering, we could probably do
away with some of the code that handled buffering before, making the
audio pipeline simpler and faster.

Fixes audio in youtube, but now the video plays at 2x speed. It seems
something is wrong with the timestamps. Possible things to investigate:
* why do we use the packet dts instead of the pts from the frames anyway?
* the pts and pkt_dts are in "stream time_base units". We seem to assume
  microseconds for audio but this is probably not the case. Or did I
  miss where the conversion is done?
  • Loading branch information
pulkomandy committed Nov 19, 2015
1 parent 235725e commit 856cc59
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 41 deletions.
65 changes: 26 additions & 39 deletions src/add-ons/media/plugins/ffmpeg/AVCodecDecoder.cpp
Expand Up @@ -5,12 +5,14 @@
* Copyright (C) 2004 Marcus Overhagen
* Copyright (C) 2009 Stephan Amßus <superstippi@gmx.de>
* Copyright (C) 2014 Colin Günther <coling@gmx.de>
* Copyright (C) 2015 Adrien Destugues <pulkomandy@pulkomandy.tk>
*
* All rights reserved. Distributed under the terms of the MIT License.
*/

//! libavcodec based decoder for Haiku


#include "AVCodecDecoder.h"

#include <new>
Expand Down Expand Up @@ -95,6 +97,7 @@ AVCodecDecoder::AVCodecDecoder()
fIsAudio(false),
fCodec(NULL),
fContext(avcodec_alloc_context3(NULL)),
fResampleContext(NULL),
fDecodedData(NULL),
fDecodedDataSizeInBytes(0),
fPostProcessedDecodedPicture(avcodec_alloc_frame()),
Expand Down Expand Up @@ -123,7 +126,6 @@ AVCodecDecoder::AVCodecDecoder()
fAudioDecodeError(false),

fDecodedDataBuffer(avcodec_alloc_frame()),
fDecodedDataBufferOffset(0),
fDecodedDataBufferSize(0)
{
TRACE("AVCodecDecoder::AVCodecDecoder()\n");
Expand Down Expand Up @@ -152,6 +154,7 @@ AVCodecDecoder::~AVCodecDecoder()
if (fCodecInitDone)
avcodec_close(fContext);

swr_free(&fResampleContext);
free(fChunkBuffer);
free(fDecodedData);

Expand Down Expand Up @@ -237,8 +240,7 @@ AVCodecDecoder::Setup(media_format* ioEncodedFormat, const void* infoBuffer,
} else {
if (fIsAudio) {
fBlockAlign
= ioEncodedFormat->u.encoded_audio.output
.buffer_size;
= ioEncodedFormat->u.encoded_audio.output.buffer_size;
TRACE(" using buffer_size as block align: %d\n",
fBlockAlign);
}
Expand Down Expand Up @@ -278,7 +280,6 @@ AVCodecDecoder::SeekedTo(int64 frame, bigtime_t time)
free(fChunkBuffer);
fChunkBuffer = NULL;
fChunkBufferSize = 0;
fDecodedDataBufferOffset = 0;
fDecodedDataBufferSize = 0;
fDecodedDataSizeInBytes = 0;

Expand Down Expand Up @@ -366,7 +367,6 @@ AVCodecDecoder::_NegotiateAudioOutputFormat(media_format* inOutFormat)
fChunkBuffer = NULL;
fChunkBufferSize = 0;
fAudioDecodeError = false;
fDecodedDataBufferOffset = 0;
fDecodedDataBufferSize = 0;

_ResetTempPacket();
Expand Down Expand Up @@ -413,6 +413,13 @@ AVCodecDecoder::_NegotiateAudioOutputFormat(media_format* inOutFormat)
if (fRawDecodedAudio->opaque == NULL)
return B_NO_MEMORY;

fResampleContext = swr_alloc_set_opts(NULL,
fContext->channel_layout, fContext->request_sample_fmt,
fContext->sample_rate,
fContext->channel_layout, fContext->sample_fmt, fContext->sample_rate,
0, NULL);
swr_init(fResampleContext);

TRACE(" bit_rate = %d, sample_rate = %d, channels = %d, "
"output frame size: %d, count: %ld, rate: %.2f\n",
fContext->bit_rate, fContext->sample_rate, fContext->channels,
Expand Down Expand Up @@ -902,24 +909,20 @@ AVCodecDecoder::_MoveAudioFramesToRawDecodedAudioAndUpdateStartTimes()
assert(fRawDecodedAudio->nb_samples < fOutputFrameCount);
assert(fOutputFrameRate > 0);

int32 frames = min_c(fOutputFrameCount - fRawDecodedAudio->nb_samples,
fDecodedDataBufferSize / fOutputFrameSize);
if (frames == 0)
debugger("fDecodedDataBufferSize not multiple of frame size!");

size_t remainingSize = frames * fOutputFrameSize;

#if 0
// Some decoders do not support format conversion on themselves, or use
// "planar" audio (each channel separated instead of interleaved samples).
// If this is a problem we will need to use swresample to convert the data
// here, instead of directly copying it.
swr_convert(fResampleContext, fRawDecodedAudio->data,
fDecodedDataBuffer->data + fDecodedDataBufferOffset, frames);
#else
memcpy(fRawDecodedAudio->data[0], fDecodedDataBuffer->data[0]
+ fDecodedDataBufferOffset, remainingSize);
#endif
// In that case, we use swresample to convert the data (and it is
// smart enough to do just a copy, when possible)
int32 frames = swr_convert(fResampleContext, fRawDecodedAudio->data,
fOutputFrameCount - fRawDecodedAudio->nb_samples,
(const uint8_t**)fDecodedDataBuffer->data,
fDecodedDataBuffer->nb_samples);
if (frames < 0)
debugger("resampling failed");
size_t remainingSize = frames * fOutputFrameSize;

// libswresample handles all the buffering for us, how nice of them!
fDecodedDataBufferSize = 0;

bool firstAudioFramesCopiedToRawDecodedAudio
= fRawDecodedAudio->data[0] != fDecodedData;
Expand All @@ -936,19 +939,6 @@ AVCodecDecoder::_MoveAudioFramesToRawDecodedAudioAndUpdateStartTimes()
fRawDecodedAudio->data[0] += remainingSize;
fRawDecodedAudio->linesize[0] += remainingSize;
fRawDecodedAudio->nb_samples += frames;

fDecodedDataBufferOffset += remainingSize;
fDecodedDataBufferSize -= remainingSize;

// Update start times accordingly
bigtime_t framesTimeInterval = static_cast<bigtime_t>(
(1000000LL * frames) / fOutputFrameRate);
fDecodedDataBuffer->pkt_dts += framesTimeInterval;
// Start time of buffer is updated in case that it contains
// more audio frames to move.
fTempPacket.dts += framesTimeInterval;
// Start time of fTempPacket is updated in case the fTempPacket
// contains more audio frames to decode.
}


Expand All @@ -970,8 +960,7 @@ AVCodecDecoder::_MoveAudioFramesToRawDecodedAudioAndUpdateStartTimes()
After this function returns successfully the caller can safely make the
following assumptions:
1. fDecodedDataBufferSize is greater than zero.
2. fDecodedDataBufferOffset is set to zero.
3. fDecodedDataBuffer contains audio frames.
2. fDecodedDataBuffer contains audio frames.
\returns B_OK on successfully decoding one audio frame chunk.
\returns B_LAST_BUFFER_ERROR No more audio frame chunks available. From
Expand All @@ -983,7 +972,7 @@ AVCodecDecoder::_DecodeNextAudioFrameChunk()
{
assert(fDecodedDataBufferSize == 0);

while(fDecodedDataBufferSize == 0) {
while (fDecodedDataBufferSize == 0) {
status_t loadingChunkStatus
= _LoadNextChunkIfNeededAndAssignStartTime();
if (loadingChunkStatus != B_OK)
Expand Down Expand Up @@ -1030,7 +1019,6 @@ AVCodecDecoder::_DecodeNextAudioFrameChunk()
Also see "Note" below.
2. fTempPacket was updated to exclude the data chunk that was consumed
by avcodec_decode_audio4().
3. fDecodedDataBufferOffset is set to zero.
When this function failed to decode at least one audio frame due to a
decoding error the caller can safely make the following assumptions:
Expand All @@ -1057,7 +1045,6 @@ AVCodecDecoder::_DecodeSomeAudioFramesIntoEmptyDecodedDataBuffer()
assert(fTempPacket.size > 0);

avcodec_get_frame_defaults(fDecodedDataBuffer);
fDecodedDataBufferOffset = 0;
int gotAudioFrame = 0;

int encodedDataSizeInBytes = avcodec_decode_audio4(fContext,
Expand Down
10 changes: 8 additions & 2 deletions src/add-ons/media/plugins/ffmpeg/AVCodecDecoder.h
Expand Up @@ -4,6 +4,7 @@
* Copyright (C) 2001 Axel Dörfler.
* Copyright (C) 2004 Marcus Overhagen.
* Copyright (C) 2009 Stephan Aßmus <superstippi@gmx.de>.
* Copyright (C) 2015 Adrien Destugues <pulkomandy@pulkomandy.tk>.
*
* All rights reserved. Distributed under the terms of the MIT License.
*/
Expand All @@ -12,13 +13,16 @@

//! libavcodec based decoder for Haiku


#include <MediaFormats.h>


extern "C" {
#include "avcodec.h"
#include "swscale.h"
#include "swresample.h"
}


#include "DecoderPlugin.h"
#include "ReaderPlugin.h"

Expand Down Expand Up @@ -93,6 +97,7 @@ class AVCodecDecoder : public Decoder {
// FFmpeg related members
AVCodec* fCodec;
AVCodecContext* fContext;
SwrContext* fResampleContext;
uint8_t* fDecodedData;
size_t fDecodedDataSizeInBytes;
AVFrame* fPostProcessedDecodedPicture;
Expand All @@ -102,7 +107,9 @@ class AVCodecDecoder : public Decoder {
bool fCodecInitDone;

gfx_convert_func fFormatConversionFunc;
#if USE_SWS_FOR_COLOR_SPACE_CONVERSION
SwsContext* fSwsContext;
#endif

char* fExtraData;
int fExtraDataSize;
Expand All @@ -119,7 +126,6 @@ class AVCodecDecoder : public Decoder {
bool fAudioDecodeError;

AVFrame* fDecodedDataBuffer;
int32 fDecodedDataBufferOffset;
int32 fDecodedDataBufferSize;

AVPacket fTempPacket;
Expand Down
1 change: 1 addition & 0 deletions src/add-ons/media/plugins/ffmpeg/Jamfile
Expand Up @@ -47,6 +47,7 @@ for architectureObject in [ MultiArchSubDirSetup ] {
UseHeaders [ FDirName $(ffmpegHeaders) libavformat ] ;
UseHeaders [ FDirName $(ffmpegHeaders) libavutil ] ;
UseHeaders [ FDirName $(ffmpegHeaders) libswscale ] ;
UseHeaders [ FDirName $(ffmpegHeaders) libswresample ] ;

Addon [ MultiArchDefaultGristFiles ffmpeg ] :
$(sources)
Expand Down

0 comments on commit 856cc59

Please sign in to comment.