Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions c_src/xav/decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@ struct Decoder *decoder_alloc() {

decoder->codec = NULL;
decoder->c = NULL;
decoder->out_format = AV_PIX_FMT_NONE;

return decoder;
}

int decoder_init(struct Decoder *decoder, const char *codec) {
int decoder_init(struct Decoder *decoder, const char *codec, const char* out_format) {
if (strcmp(codec, "opus") == 0) {
decoder->media_type = AVMEDIA_TYPE_AUDIO;
decoder->codec = avcodec_find_decoder(AV_CODEC_ID_OPUS);
Expand All @@ -25,7 +26,7 @@ int decoder_init(struct Decoder *decoder, const char *codec) {
decoder->codec = avcodec_find_decoder(AV_CODEC_ID_H264);
} else if (strcmp(codec, "h265") == 0) {
decoder->media_type = AVMEDIA_TYPE_VIDEO;
decoder->codec = avcodec_find_decoder(AV_CODEC_ID_H265);
decoder->codec = avcodec_find_decoder(AV_CODEC_ID_HEVC);
} else {
return -1;
}
Expand All @@ -34,6 +35,13 @@ int decoder_init(struct Decoder *decoder, const char *codec) {
return -1;
}

if(decoder->media_type == AVMEDIA_TYPE_VIDEO && strcmp(out_format, "nil") != 0) {
decoder->out_format = av_get_pix_fmt(out_format);
if (decoder->out_format == AV_PIX_FMT_NONE) {
return -1;
}
}

decoder->c = avcodec_alloc_context3(decoder->codec);
if (!decoder->c) {
return -1;
Expand Down
3 changes: 2 additions & 1 deletion c_src/xav/decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

struct Decoder {
enum AVMediaType media_type;
enum AVPixelFormat out_format;
AVFrame *frame;
AVPacket *pkt;
const AVCodec *codec;
Expand All @@ -16,7 +17,7 @@ struct Decoder {

struct Decoder *decoder_alloc();

int decoder_init(struct Decoder *decoder, const char *codec);
int decoder_init(struct Decoder *decoder, const char *codec, const char* out_format);

int decoder_decode(struct Decoder *decoder, AVPacket *pkt, AVFrame *frame);

Expand Down
14 changes: 9 additions & 5 deletions c_src/xav/utils.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "utils.h"
#include <libavutil/mathematics.h>
#include <libavutil/imgutils.h>
#include <libavutil/opt.h>
#include <stdint.h>

Expand Down Expand Up @@ -33,13 +34,16 @@ ERL_NIF_TERM xav_nif_audio_frame_to_term(ErlNifEnv *env, uint8_t **out_data, int
return enif_make_tuple(env, 4, data_term, format_term, samples_term, pts_term);
}

ERL_NIF_TERM xav_nif_video_frame_to_term(ErlNifEnv *env, AVFrame *frame, uint8_t *data[4],
int linesize[4], const char *format_name) {
ERL_NIF_TERM xav_nif_video_frame_to_term(ErlNifEnv *env, AVFrame *frame) {
ERL_NIF_TERM data_term;
unsigned char *ptr = enif_make_new_binary(env, linesize[0] * frame->height, &data_term);
memcpy(ptr, data[0], linesize[0] * frame->height);

ERL_NIF_TERM format_term = enif_make_atom(env, format_name);
int payload_size = av_image_get_buffer_size(frame->format, frame->width, frame->height, 1);
unsigned char *ptr = enif_make_new_binary(env, payload_size, &data_term);

av_image_copy_to_buffer(ptr, payload_size, (const uint8_t *const *)frame->data,
(const int*)frame->linesize, frame->format, frame->width, frame->height, 1);

ERL_NIF_TERM format_term = enif_make_atom(env, av_get_pix_fmt_name(frame->format));
ERL_NIF_TERM height_term = enif_make_int(env, frame->height);
ERL_NIF_TERM width_term = enif_make_int(env, frame->width);
ERL_NIF_TERM pts_term = enif_make_int64(env, frame->pts);
Expand Down
3 changes: 1 addition & 2 deletions c_src/xav/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
ERL_NIF_TERM xav_nif_ok(ErlNifEnv *env, ERL_NIF_TERM data_term);
ERL_NIF_TERM xav_nif_error(ErlNifEnv *env, char *reason);
ERL_NIF_TERM xav_nif_raise(ErlNifEnv *env, char *msg);
ERL_NIF_TERM xav_nif_video_frame_to_term(ErlNifEnv *env, AVFrame *frame, uint8_t *out_data[4],
int out_linesize[4], const char *out_format);
ERL_NIF_TERM xav_nif_video_frame_to_term(ErlNifEnv *env, AVFrame *frame);
ERL_NIF_TERM xav_nif_audio_frame_to_term(ErlNifEnv *env, uint8_t **out_data, int out_samples,
int out_size, const char *out_format, int pts);
29 changes: 21 additions & 8 deletions c_src/xav/video_converter.c
Original file line number Diff line number Diff line change
@@ -1,30 +1,43 @@
#include "video_converter.h"

int video_converter_convert(AVFrame *src_frame, uint8_t *out_data[], int out_linesize[]) {
int video_converter_convert(AVFrame *src_frame, AVFrame **dst_frame, enum AVPixelFormat out_format) {
int ret;

*dst_frame = av_frame_alloc();
if (!*dst_frame) {
return -1;
}

(*dst_frame)->width = src_frame->width;
(*dst_frame)->height = src_frame->height;
(*dst_frame)->format = out_format;
(*dst_frame)->pts = src_frame->pts;

ret = av_frame_get_buffer(*dst_frame, 0);
if (ret < 0) {
return ret;
}

struct SwsContext *sws_ctx =
sws_getContext(src_frame->width, src_frame->height, src_frame->format, src_frame->width,
src_frame->height, AV_PIX_FMT_RGB24, SWS_BILINEAR, NULL, NULL, NULL);

ret = av_image_alloc(out_data, out_linesize, src_frame->width, src_frame->height,
AV_PIX_FMT_RGB24, 1);
src_frame->height, out_format, SWS_BILINEAR, NULL, NULL, NULL);

if (ret < 0) {
return ret;
}


// is this (const uint8_t * const*) cast really correct?
ret = sws_scale(sws_ctx, (const uint8_t *const *)src_frame->data, src_frame->linesize, 0,
src_frame->height, out_data, out_linesize);
src_frame->height, (*dst_frame)->data, (*dst_frame)->linesize);

if (ret < 0) {
av_freep(&out_data[0]);
av_frame_free(dst_frame);
sws_freeContext(sws_ctx);
return ret;
}

sws_freeContext(sws_ctx);

return ret;
}
}
2 changes: 1 addition & 1 deletion c_src/xav/video_converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
#include <libswscale/swscale.h>
#include <stdint.h>

int video_converter_convert(AVFrame *src_frame, uint8_t *out_data[4], int out_linesize[4]);
int video_converter_convert(AVFrame *src_frame, AVFrame **dst_frame, enum AVPixelFormat out_format);
16 changes: 10 additions & 6 deletions c_src/xav/xav_decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ ERL_NIF_TERM new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
return xav_nif_raise(env, "failed_to_allocate_decoder");
}

if (decoder_init(xav_decoder->decoder, codec) != 0) {
if (decoder_init(xav_decoder->decoder, codec, xav_decoder->out_format) != 0) {
return xav_nif_raise(env, "failed_to_init_decoder");
}

Expand All @@ -79,17 +79,21 @@ ERL_NIF_TERM convert(ErlNifEnv *env, struct XavDecoder *xav_decoder, AVFrame* fr
if (xav_decoder->decoder->media_type == AVMEDIA_TYPE_VIDEO) {
XAV_LOG_DEBUG("Converting video to RGB");

uint8_t *out_data[4];
int out_linesize[4];
int out_pix_fmt = xav_decoder->decoder->out_format;

ret = video_converter_convert(frame, out_data, out_linesize);
if (out_pix_fmt == AV_PIX_FMT_NONE) {
return xav_nif_video_frame_to_term(env, frame);
}
Comment on lines +84 to +86
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thnk we should distinct between a case where someone didn't pass out_format and someone passed out_format but it was incorrect.

In decoder_init, we should check whether out_format is nil. If yes, set out_format field in decoder to null. If not, try to call av_get_pix_fmt. If av_get_pix_fmt returns AV_PIX_FMT_NONE, return an error.


AVFrame *dst_frame;
ret = video_converter_convert(frame, &dst_frame, out_pix_fmt);
if (ret <= 0) {
return xav_nif_raise(env, "failed_to_decode");
}

frame_term = xav_nif_video_frame_to_term(env, frame, out_data, out_linesize, "rgb");
frame_term = xav_nif_video_frame_to_term(env, dst_frame);

av_freep(&out_data[0]);
av_frame_free(&dst_frame);
} else if (xav_decoder->decoder->media_type == AVMEDIA_TYPE_AUDIO) {
XAV_LOG_DEBUG("Converting audio to desired out format");

Expand Down
11 changes: 4 additions & 7 deletions c_src/xav/xav_reader.c
Original file line number Diff line number Diff line change
Expand Up @@ -147,18 +147,15 @@ ERL_NIF_TERM next_frame(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
if (xav_reader->reader->media_type == AVMEDIA_TYPE_VIDEO) {
XAV_LOG_DEBUG("Converting video to RGB");

uint8_t *out_data[4];
int out_linesize[4];

ret = video_converter_convert(xav_reader->reader->frame, out_data, out_linesize);
AVFrame *dst_frame;
ret = video_converter_convert(xav_reader->reader->frame, &dst_frame, AV_PIX_FMT_RGB24);
if (ret <= 0) {
return xav_nif_raise(env, "failed_to_read");
}

frame_term =
xav_nif_video_frame_to_term(env, xav_reader->reader->frame, out_data, out_linesize, "rgb");
frame_term = xav_nif_video_frame_to_term(env, dst_frame);

av_freep(&out_data[0]);
av_frame_free(&dst_frame);
} else if (xav_reader->reader->media_type == AVMEDIA_TYPE_AUDIO) {
XAV_LOG_DEBUG("Converting audio to desired out format");

Expand Down
7 changes: 5 additions & 2 deletions lib/decoder.ex
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,11 @@ defmodule Xav.Decoder do
[out_format: :f32]
```

Video frames are always returned in RGB format.
This setting cannot be changed.
or video samples format:

```elixir
[out_format: :rgb24]
```

Audio samples are always in the packed form -
samples from different channels are interleaved in the same, single binary:
Expand Down
10 changes: 8 additions & 2 deletions lib/frame.ex
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,15 @@ defmodule Xav.Frame do
@typedoc """
Possible video frame formats.

Currently, only RGB is supported.
The list of accepted formats are all `ffmpeg` pixel formats. For a complete list run:

```sh
ffmpeg -pix_fmts
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Appreciate instruction how to get a list of possible formats!

```

An example of a pixel format is `:rgb24`.
"""
@type video_format() :: :rgb
@type video_format() :: atom()

@type format() :: audio_format() | video_format()

Expand Down
22 changes: 19 additions & 3 deletions test/decoder_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,11 @@ defmodule Xav.DecoderTest do
142, 204, 5, 106, 217, 175, 162, 62, 128, 161, 69, 136, 234, 30, 43, 165, 152,
104, 143>>

# Use ffmpeg to extract the first frame of the video
# ffmpeg -i sample_video.mp4 -c:v copy -f h264 -vframes 1 sample_h264.h264
@h264_frame File.read!("test/fixtures/decoder/sample_h264.h264")
# You can do the same for hevc given that the mp4 file contains a hevc stream
# ffmpeg -i sample_video.mp4 -c:v copy -f hevc -vframes 1 sample_h265.h265
@h265_frame File.read!("test/fixtures/decoder/sample_h265.h265")

test "new/0" do
Expand Down Expand Up @@ -323,8 +327,10 @@ defmodule Xav.DecoderTest do
test "video keyframe" do
decoder = Xav.Decoder.new(:vp8)

assert {:ok, %Xav.Frame{width: 640, height: 480, pts: 0, format: :rgb}} =
assert {:ok, %Xav.Frame{width: 640, height: 480, pts: 0, data: frame, format: :yuv420p}} =
Xav.Decoder.decode(decoder, @vp8_keyframe)

assert byte_size(frame) == 640 * 480 * 3 / 2
end

test "video without prior keyframe" do
Expand All @@ -338,7 +344,7 @@ defmodule Xav.DecoderTest do

assert :ok = Xav.Decoder.decode(decoder, @h264_frame)

assert {:ok, [%Xav.Frame{width: 1280, height: 720, pts: 0, format: :rgb}]} =
assert {:ok, [%Xav.Frame{width: 1280, height: 720, pts: 0, format: :yuv420p}]} =
Xav.Decoder.flush(decoder)
end

Expand All @@ -347,8 +353,18 @@ defmodule Xav.DecoderTest do

assert :ok = Xav.Decoder.decode(decoder, @h265_frame)

assert {:ok, [%Xav.Frame{width: 1920, height: 1080, pts: 0, format: :rgb}]} =
assert {:ok, [%Xav.Frame{width: 1920, height: 1080, pts: 0, format: :yuv420p}]} =
Xav.Decoder.flush(decoder)
end

test "convert video frame" do
decoder = Xav.Decoder.new(:vp8, out_format: :rgb24)

assert {:ok, %Xav.Frame{width: 640, height: 480, pts: 0, data: frame, format: :rgb24}} =
Xav.Decoder.decode(decoder, @vp8_keyframe)

assert byte_size(frame) == 640 * 480 * 3
end
end
end
end