Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ decoder = Xav.Decoder.new(:vp8, out_format: :rgb24)
Decode with audio resampling

```elixir
decoder = Xav.Decoder.new(:opus, out_format: :f32, out_sample_rate: 16_000)
decoder = Xav.Decoder.new(:opus, out_format: :flt, out_sample_rate: 16_000)
{:ok, %Xav.Frame{} = frame} = Xav.Decoder.decode(decoder, <<"somebinary">>)
```

Expand Down Expand Up @@ -74,7 +74,7 @@ serving =
# Read a couple of frames.
# See https://hexdocs.pm/bumblebee/Bumblebee.Audio.WhisperFeaturizer.html for default sampling rate.
frames =
Xav.Reader.stream!("sample.mp3", read: :audio, out_format: :f32, out_channels: 1, out_sample_rate: 16_000)
Xav.Reader.stream!("sample.mp3", read: :audio, out_format: :flt, out_channels: 1, out_sample_rate: 16_000)
|> Stream.take(200)
|> Enum.map(fn frame -> Xav.Frame.to_nx(frame) end)

Expand Down
10 changes: 3 additions & 7 deletions c_src/xav/decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,9 @@ struct Decoder *decoder_alloc() {
return decoder;
}

int decoder_init(struct Decoder *decoder, enum AVMediaType media_type, enum AVCodecID codec_id) {
decoder->media_type = media_type;
decoder->codec = avcodec_find_decoder(codec_id);

if (!decoder->codec) {
return -1;
}
int decoder_init(struct Decoder *decoder, const AVCodec *codec) {
decoder->media_type = codec->type;
decoder->codec = codec;

decoder->c = avcodec_alloc_context3(decoder->codec);
if (!decoder->c) {
Expand Down
2 changes: 1 addition & 1 deletion c_src/xav/decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ struct Decoder {

struct Decoder *decoder_alloc();

int decoder_init(struct Decoder *decoder, enum AVMediaType media_type, enum AVCodecID codec_id);
int decoder_init(struct Decoder *decoder, const AVCodec *codec);

int decoder_decode(struct Decoder *decoder, AVPacket *pkt, AVFrame *frame);

Expand Down
98 changes: 74 additions & 24 deletions c_src/xav/xav_decoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,30 +18,22 @@ ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
}

ERL_NIF_TERM ret;
char *codec = NULL;
char *codec_name = NULL;
char *out_format = NULL;

// resolve codec
if (!xav_nif_get_atom(env, argv[0], &codec)) {
if (!xav_nif_get_atom(env, argv[0], &codec_name)) {
return xav_nif_raise(env, "failed_to_get_atom");
}

enum AVMediaType media_type;
enum AVCodecID codec_id;
if (strcmp(codec, "opus") == 0) {
media_type = AVMEDIA_TYPE_AUDIO;
codec_id = AV_CODEC_ID_OPUS;
} else if (strcmp(codec, "vp8") == 0) {
media_type = AVMEDIA_TYPE_VIDEO;
codec_id = AV_CODEC_ID_VP8;
} else if (strcmp(codec, "h264") == 0) {
media_type = AVMEDIA_TYPE_VIDEO;
codec_id = AV_CODEC_ID_H264;
} else if (strcmp(codec, "h265") == 0 || strcmp(codec, "hevc") == 0) {
media_type = AVMEDIA_TYPE_VIDEO;
codec_id = AV_CODEC_ID_HEVC;
} else {
ret = xav_nif_raise(env, "failed_to_resolve_codec");
const AVCodec *codec = avcodec_find_decoder_by_name(codec_name);
if (codec == NULL) {
ret = xav_nif_raise(env, "unknown_codec");
goto clean;
}

if (codec->type != AVMEDIA_TYPE_VIDEO && codec->type != AVMEDIA_TYPE_AUDIO) {
ret = xav_nif_raise(env, "unsupported_media_type");
goto clean;
}

Expand All @@ -53,13 +45,13 @@ ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {

enum AVPixelFormat out_video_fmt = AV_PIX_FMT_NONE;
enum AVSampleFormat out_audo_fmt = AV_SAMPLE_FMT_NONE;
if (media_type == AVMEDIA_TYPE_VIDEO && strcmp(out_format, "nil") != 0) {
if (codec->type == AVMEDIA_TYPE_VIDEO && strcmp(out_format, "nil") != 0) {
out_video_fmt = av_get_pix_fmt(out_format);
if (out_video_fmt == AV_PIX_FMT_NONE) {
ret = xav_nif_raise(env, "unknown_out_format");
goto clean;
}
} else if (media_type == AVMEDIA_TYPE_AUDIO && strcmp(out_format, "nil") != 0) {
} else if (codec->type == AVMEDIA_TYPE_AUDIO && strcmp(out_format, "nil") != 0) {
out_audo_fmt = av_get_sample_fmt(out_format);
if (out_audo_fmt == AV_SAMPLE_FMT_NONE) {
ret = xav_nif_raise(env, "unknown_out_format");
Expand Down Expand Up @@ -110,7 +102,7 @@ ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
goto clean;
}

if (decoder_init(xav_decoder->decoder, media_type, codec_id) != 0) {
if (decoder_init(xav_decoder->decoder, codec) != 0) {
ret = xav_nif_raise(env, "failed_to_init_decoder");
goto clean;
}
Expand All @@ -119,8 +111,8 @@ ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
enif_release_resource(xav_decoder);

clean:
if (codec != NULL)
XAV_FREE(codec);
if (codec_name != NULL)
XAV_FREE(codec_name);
if (out_format != NULL)
XAV_FREE(out_format);

Expand Down Expand Up @@ -267,6 +259,61 @@ ERL_NIF_TERM flush(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
return xav_nif_ok(env, enif_make_list_from_array(env, frame_terms, frames_count));
}

ERL_NIF_TERM pixel_formats(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
ERL_NIF_TERM result = enif_make_list(env, 0);

const AVPixFmtDescriptor *desc = NULL;

while ((desc = av_pix_fmt_desc_next(desc))) {
ERL_NIF_TERM name = enif_make_atom(env, desc->name);
ERL_NIF_TERM nb_components = enif_make_int(env, desc->nb_components);
ERL_NIF_TERM is_hwaccel =
enif_make_atom(env, desc->flags & AV_PIX_FMT_FLAG_HWACCEL ? "true" : "false");

result =
enif_make_list_cell(env, enif_make_tuple3(env, name, nb_components, is_hwaccel), result);
}

return result;
}

ERL_NIF_TERM sample_formats(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
ERL_NIF_TERM result = enif_make_list(env, 0);

for (int fmt = 0; fmt < AV_SAMPLE_FMT_NB; fmt++) {
enum AVSampleFormat sample_format = (enum AVSampleFormat)fmt;
const char *name = av_get_sample_fmt_name(sample_format);
int nb_bytes = av_get_bytes_per_sample(sample_format);

ERL_NIF_TERM desc =
enif_make_tuple2(env, enif_make_atom(env, name), enif_make_int(env, nb_bytes));

result = enif_make_list_cell(env, desc, result);
}

return result;
}

ERL_NIF_TERM list_decoders(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
ERL_NIF_TERM result = enif_make_list(env, 0);

const AVCodec *codec = NULL;
void *iter = NULL;

while ((codec = av_codec_iterate(&iter))) {
if (av_codec_is_decoder(codec)) {
ERL_NIF_TERM name = enif_make_atom(env, codec->name);
ERL_NIF_TERM long_name = enif_make_string(env, codec->long_name, ERL_NIF_LATIN1);
ERL_NIF_TERM media_type = enif_make_atom(env, av_get_media_type_string(codec->type));

ERL_NIF_TERM desc = enif_make_tuple3(env, name, long_name, media_type);
result = enif_make_list_cell(env, desc, result);
}
}

return result;
}

static int init_audio_converter(struct XavDecoder *xav_decoder) {
xav_decoder->ac = audio_converter_alloc();

Expand Down Expand Up @@ -345,7 +392,10 @@ void free_xav_decoder(ErlNifEnv *env, void *obj) {

static ErlNifFunc xav_funcs[] = {{"new", 6, new},
{"decode", 4, decode, ERL_NIF_DIRTY_JOB_CPU_BOUND},
{"flush", 1, flush, ERL_NIF_DIRTY_JOB_CPU_BOUND}};
{"flush", 1, flush, ERL_NIF_DIRTY_JOB_CPU_BOUND},
{"pixel_formats", 0, pixel_formats},
{"sample_formats", 0, sample_formats},
{"list_decoders", 0, list_decoders}};

static int load(ErlNifEnv *env, void **priv, ERL_NIF_TERM load_info) {
xav_decoder_resource_type =
Expand Down
41 changes: 41 additions & 0 deletions lib/xav.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
defmodule Xav do
@moduledoc File.read!("README.md")

@doc """
Get all available pixel formats.

The result is a list of 3-element tuples `{name, nb_components, hw_accelerated_format?}`:
* `name` - The name of the pixel format.
* `nb_components` - The number of the components in the pixel format.
* `hw_accelerated_format?` - Whether the pixel format is a hardware accelerated format.
"""
@spec pixel_formats() :: [{atom(), integer(), boolean()}]
def pixel_formats(), do: Xav.Decoder.NIF.pixel_formats() |> Enum.reverse()

@doc """
Get all available audio sample formats.

The result is a list of 2-element tuples `{name, nb_bytes}`:
* `name` - The name of the sample format.
* `nb_bytes` - The number of bytes per sample.
"""
@spec sample_formats() :: [{atom(), integer()}]
def sample_formats(), do: Xav.Decoder.NIF.sample_formats() |> Enum.reverse()

@doc """
List all decoders.

The result is a list of 3-element tuples `{name, long_name, media_type}`:
* `name` - The short name of the decoder.
* `long_name` - The long name of the decoder.
* `media_type` - The media type of the decoder.
"""
@spec list_decoders() :: [{name :: atom(), long_name :: String.t(), media_type :: atom()}]
def list_decoders() do
Xav.Decoder.NIF.list_decoders()
|> Enum.map(fn {name, long_name, media_type} ->
{name, List.to_string(long_name), media_type}
end)
|> Enum.reverse()
end
end
120 changes: 71 additions & 49 deletions lib/decoder.ex → lib/xav/decoder.ex
Original file line number Diff line number Diff line change
Expand Up @@ -5,65 +5,87 @@ defmodule Xav.Decoder do

@typedoc """
Supported codecs.

To get the list of available decoders see `Xav.list_decoders/0`.
"""
@type codec() :: :opus | :vp8 | :h264 | :h265
@type codec() :: atom()

@type t() :: reference()

@typedoc """
Opts that can be passed to `new/2`.
"""
@type opts :: [
out_format: Xav.Frame.format(),
out_sample_rate: integer(),
out_channels: integer(),
out_width: Xav.Frame.width(),
out_height: Xav.Frame.height()
]
@decoder_options_schema [
out_format: [
type: :atom,
doc: """
Output format of the samples.

In case of video, it's the pixel format. In case of audio, it's the sample format.

To get the list of supported pixel formats use `Xav.pixel_formats/0`,
and for sample formats `Xav.sample_formats/0`.
"""
],
out_sample_rate: [
type: :pos_integer,
doc: """
Audio sample rate.

If not specified, the sample rate of the input stream will be used.
"""
],
out_channels: [
type: :pos_integer,
doc: """
Number of audio channels.

If not specified, the number of channels of the input stream will be used.

Audio samples are always in the packed form -
samples from different channels are interleaved in the same, single binary:

```
<<c10, c20, c30, c11, c21, c31, c12, c22, c32>>
```

An alternative would be to return a list of binaries, where
each binary represents different channel:
```
[
<<c10, c11, c12, c13, c14>>,
<<c20, c21, c22, c23, c24>>,
<<c30, c31, c32, c33, c34>>
]
```
"""
],
out_width: [
type: :pos_integer,
doc: "Scale the output video frame to the provided width."
],
out_height: [
type: :pos_integer,
doc: "Scale the output video frame to the provided height."
]
]

@doc """
Creates a new decoder.

`opts` can be used to specify desired output parameters.

E.g. if you want to change audio samples format just pass:

```elixir
[out_format: :f32]
```
`codec` is any audio/video decoder supported by `FFmpeg`.

or video samples format:

```elixir
[out_format: :rgb24]
```

Audio samples are always in the packed form -
samples from different channels are interleaved in the same, single binary:

```
<<c10, c20, c30, c11, c21, c31, c12, c22, c32>>
```

An alternative would be to return a list of binaries, where
each binary represents different channel:

```
[
<<c10, c11, c12, c13, c14>>,
<<c20, c21, c22, c23, c24>>,
<<c30, c31, c32, c33, c34>>
]
```
`opts` can be used to specify desired output parameters:\n#{NimbleOptions.docs(@decoder_options_schema)}
"""
@spec new(codec(), opts()) :: t()
def new(codec, opts \\ []) do
out_format = opts[:out_format]
out_sample_rate = opts[:out_sample_rate] || 0
out_channels = opts[:out_channels] || 0
out_width = opts[:out_width] || -1
out_height = opts[:out_height] || -1
Xav.Decoder.NIF.new(codec, out_format, out_sample_rate, out_channels, out_width, out_height)
@spec new(codec(), Keyword.t()) :: t()
def new(codec, opts \\ []) when is_atom(codec) do
opts = NimbleOptions.validate!(opts, @decoder_options_schema)

Xav.Decoder.NIF.new(
codec,
opts[:out_format],
opts[:out_sample_rate] || 0,
opts[:out_channels] || 0,
opts[:out_width] || -1,
opts[:out_height] || -1
)
end

@doc """
Expand Down
6 changes: 6 additions & 0 deletions lib/decoder_nif.ex → lib/xav/decoder_nif.ex
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,10 @@ defmodule Xav.Decoder.NIF do
def decode(_decoder, _data, _pts, _dts), do: :erlang.nif_error(:undef)

def flush(_decoder), do: :erlang.nif_error(:undef)

def pixel_formats(), do: :erlang.nif_error(:undef)

def sample_formats(), do: :erlang.nif_error(:undef)

def list_decoders(), do: :erlang.nif_error(:undef)
end
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
4 changes: 2 additions & 2 deletions test/decoder_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -348,8 +348,8 @@ defmodule Xav.DecoderTest do
Xav.Decoder.flush(decoder)
end

test "h265 video" do
decoder = Xav.Decoder.new(:h265)
test "hevc video" do
decoder = Xav.Decoder.new(:hevc)

assert :ok = Xav.Decoder.decode(decoder, @h265_frame)

Expand Down
Loading