elixir-webrtc · gBillal · Feb 4, 2025 · Feb 3, 2025 · Feb 3, 2025 · Feb 3, 2025
diff --git a/README.md b/README.md
@@ -36,7 +36,7 @@ decoder = Xav.Decoder.new(:vp8, out_format: :rgb24)
 Decode with audio resampling
 
 ```elixir
-decoder = Xav.Decoder.new(:opus, out_format: :f32, out_sample_rate: 16_000)
+decoder = Xav.Decoder.new(:opus, out_format: :flt, out_sample_rate: 16_000)
 {:ok, %Xav.Frame{} = frame} = Xav.Decoder.decode(decoder, <<"somebinary">>)
 ```
 
@@ -74,7 +74,7 @@ serving =
 # Read a couple of frames.
 # See https://hexdocs.pm/bumblebee/Bumblebee.Audio.WhisperFeaturizer.html for default sampling rate.
 frames =
-    Xav.Reader.stream!("sample.mp3", read: :audio, out_format: :f32, out_channels: 1, out_sample_rate: 16_000)
+    Xav.Reader.stream!("sample.mp3", read: :audio, out_format: :flt, out_channels: 1, out_sample_rate: 16_000)
     |> Stream.take(200)
     |> Enum.map(fn frame -> Xav.Frame.to_nx(frame) end)
 

diff --git a/c_src/xav/decoder.c b/c_src/xav/decoder.c
@@ -13,13 +13,9 @@ struct Decoder *decoder_alloc() {
   return decoder;
 }
 
-int decoder_init(struct Decoder *decoder, enum AVMediaType media_type, enum AVCodecID codec_id) {
-  decoder->media_type = media_type;
-  decoder->codec = avcodec_find_decoder(codec_id);
-
-  if (!decoder->codec) {
-    return -1;
-  }
+int decoder_init(struct Decoder *decoder, const AVCodec *codec) {
+  decoder->media_type = codec->type;
+  decoder->codec = codec;
 
   decoder->c = avcodec_alloc_context3(decoder->codec);
   if (!decoder->c) {

diff --git a/c_src/xav/decoder.h b/c_src/xav/decoder.h
@@ -16,7 +16,7 @@ struct Decoder {
 
 struct Decoder *decoder_alloc();
 
-int decoder_init(struct Decoder *decoder, enum AVMediaType media_type, enum AVCodecID codec_id);
+int decoder_init(struct Decoder *decoder, const AVCodec *codec);
 
 int decoder_decode(struct Decoder *decoder, AVPacket *pkt, AVFrame *frame);
 

diff --git a/c_src/xav/xav_decoder.c b/c_src/xav/xav_decoder.c
@@ -18,30 +18,22 @@ ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
   }
 
   ERL_NIF_TERM ret;
-  char *codec = NULL;
+  char *codec_name = NULL;
   char *out_format = NULL;
 
   // resolve codec
-  if (!xav_nif_get_atom(env, argv[0], &codec)) {
+  if (!xav_nif_get_atom(env, argv[0], &codec_name)) {
     return xav_nif_raise(env, "failed_to_get_atom");
   }
 
-  enum AVMediaType media_type;
-  enum AVCodecID codec_id;
-  if (strcmp(codec, "opus") == 0) {
-    media_type = AVMEDIA_TYPE_AUDIO;
-    codec_id = AV_CODEC_ID_OPUS;
-  } else if (strcmp(codec, "vp8") == 0) {
-    media_type = AVMEDIA_TYPE_VIDEO;
-    codec_id = AV_CODEC_ID_VP8;
-  } else if (strcmp(codec, "h264") == 0) {
-    media_type = AVMEDIA_TYPE_VIDEO;
-    codec_id = AV_CODEC_ID_H264;
-  } else if (strcmp(codec, "h265") == 0 || strcmp(codec, "hevc") == 0) {
-    media_type = AVMEDIA_TYPE_VIDEO;
-    codec_id = AV_CODEC_ID_HEVC;
-  } else {
-    ret = xav_nif_raise(env, "failed_to_resolve_codec");
+  const AVCodec *codec = avcodec_find_decoder_by_name(codec_name);
+  if (codec == NULL) {
+    ret = xav_nif_raise(env, "unknown_codec");
+    goto clean;
+  }
+
+  if (codec->type != AVMEDIA_TYPE_VIDEO && codec->type != AVMEDIA_TYPE_AUDIO) {
+    ret = xav_nif_raise(env, "unsupported_media_type");
     goto clean;
   }
 
@@ -53,13 +45,13 @@ ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
 
   enum AVPixelFormat out_video_fmt = AV_PIX_FMT_NONE;
   enum AVSampleFormat out_audo_fmt = AV_SAMPLE_FMT_NONE;
-  if (media_type == AVMEDIA_TYPE_VIDEO && strcmp(out_format, "nil") != 0) {
+  if (codec->type == AVMEDIA_TYPE_VIDEO && strcmp(out_format, "nil") != 0) {
     out_video_fmt = av_get_pix_fmt(out_format);
     if (out_video_fmt == AV_PIX_FMT_NONE) {
       ret = xav_nif_raise(env, "unknown_out_format");
       goto clean;
     }
-  } else if (media_type == AVMEDIA_TYPE_AUDIO && strcmp(out_format, "nil") != 0) {
+  } else if (codec->type == AVMEDIA_TYPE_AUDIO && strcmp(out_format, "nil") != 0) {
     out_audo_fmt = av_get_sample_fmt(out_format);
     if (out_audo_fmt == AV_SAMPLE_FMT_NONE) {
       ret = xav_nif_raise(env, "unknown_out_format");
@@ -110,7 +102,7 @@ ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
     goto clean;
   }
 
-  if (decoder_init(xav_decoder->decoder, media_type, codec_id) != 0) {
+  if (decoder_init(xav_decoder->decoder, codec) != 0) {
     ret = xav_nif_raise(env, "failed_to_init_decoder");
     goto clean;
   }
@@ -119,8 +111,8 @@ ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
   enif_release_resource(xav_decoder);
 
 clean:
-  if (codec != NULL)
-    XAV_FREE(codec);
+  if (codec_name != NULL)
+    XAV_FREE(codec_name);
   if (out_format != NULL)
     XAV_FREE(out_format);
 
@@ -267,6 +259,61 @@ ERL_NIF_TERM flush(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
   return xav_nif_ok(env, enif_make_list_from_array(env, frame_terms, frames_count));
 }
 
+ERL_NIF_TERM pixel_formats(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+  ERL_NIF_TERM result = enif_make_list(env, 0);
+
+  const AVPixFmtDescriptor *desc = NULL;
+
+  while ((desc = av_pix_fmt_desc_next(desc))) {
+    ERL_NIF_TERM name = enif_make_atom(env, desc->name);
+    ERL_NIF_TERM nb_components = enif_make_int(env, desc->nb_components);
+    ERL_NIF_TERM is_hwaccel =
+        enif_make_atom(env, desc->flags & AV_PIX_FMT_FLAG_HWACCEL ? "true" : "false");
+
+    result =
+        enif_make_list_cell(env, enif_make_tuple3(env, name, nb_components, is_hwaccel), result);
+  }
+
+  return result;
+}
+
+ERL_NIF_TERM sample_formats(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+  ERL_NIF_TERM result = enif_make_list(env, 0);
+
+  for (int fmt = 0; fmt < AV_SAMPLE_FMT_NB; fmt++) {
+    enum AVSampleFormat sample_format = (enum AVSampleFormat)fmt;
+    const char *name = av_get_sample_fmt_name(sample_format);
+    int nb_bytes = av_get_bytes_per_sample(sample_format);
+
+    ERL_NIF_TERM desc =
+        enif_make_tuple2(env, enif_make_atom(env, name), enif_make_int(env, nb_bytes));
+
+    result = enif_make_list_cell(env, desc, result);
+  }
+
+  return result;
+}
+
+ERL_NIF_TERM list_decoders(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+  ERL_NIF_TERM result = enif_make_list(env, 0);
+
+  const AVCodec *codec = NULL;
+  void *iter = NULL;
+
+  while ((codec = av_codec_iterate(&iter))) {
+    if (av_codec_is_decoder(codec)) {
+      ERL_NIF_TERM name = enif_make_atom(env, codec->name);
+      ERL_NIF_TERM long_name = enif_make_string(env, codec->long_name, ERL_NIF_LATIN1);
+      ERL_NIF_TERM media_type = enif_make_atom(env, av_get_media_type_string(codec->type));
+
+      ERL_NIF_TERM desc = enif_make_tuple3(env, name, long_name, media_type);
+      result = enif_make_list_cell(env, desc, result);
+    }
+  }
+
+  return result;
+}
+
 static int init_audio_converter(struct XavDecoder *xav_decoder) {
   xav_decoder->ac = audio_converter_alloc();
 
@@ -345,7 +392,10 @@ void free_xav_decoder(ErlNifEnv *env, void *obj) {
 
 static ErlNifFunc xav_funcs[] = {{"new", 6, new},
                                  {"decode", 4, decode, ERL_NIF_DIRTY_JOB_CPU_BOUND},
-                                 {"flush", 1, flush, ERL_NIF_DIRTY_JOB_CPU_BOUND}};
+                                 {"flush", 1, flush, ERL_NIF_DIRTY_JOB_CPU_BOUND},
+                                 {"pixel_formats", 0, pixel_formats},
+                                 {"sample_formats", 0, sample_formats},
+                                 {"list_decoders", 0, list_decoders}};
 
 static int load(ErlNifEnv *env, void **priv, ERL_NIF_TERM load_info) {
   xav_decoder_resource_type =

diff --git a/lib/xav.ex b/lib/xav.ex
@@ -0,0 +1,41 @@
+defmodule Xav do
+  @moduledoc File.read!("README.md")
+
+  @doc """
+  Get all available pixel formats.
+
+  The result is a list of 3-element tuples `{name, nb_components, hw_accelerated_format?}`:
+    * `name` - The name of the pixel format.
+    * `nb_components` - The number of the components in the pixel format.
+    * `hw_accelerated_format?` - Whether the pixel format is a hardware accelerated format.
+  """
+  @spec pixel_formats() :: [{atom(), integer(), boolean()}]
+  def pixel_formats(), do: Xav.Decoder.NIF.pixel_formats() |> Enum.reverse()
+
+  @doc """
+  Get all available audio sample formats.
+
+  The result is a list of 2-element tuples `{name, nb_bytes}`:
+    * `name` - The name of the sample format.
+    * `nb_bytes` - The number of bytes per sample.
+  """
+  @spec sample_formats() :: [{atom(), integer()}]
+  def sample_formats(), do: Xav.Decoder.NIF.sample_formats() |> Enum.reverse()
+
+  @doc """
+  List all decoders.
+
+  The result is a list of 3-element tuples `{name, long_name, media_type}`:
+    * `name` - The short name of the decoder.
+    * `long_name` - The long name of the decoder.
+    * `media_type` - The media type of the decoder.
+  """
+  @spec list_decoders() :: [{name :: atom(), long_name :: String.t(), media_type :: atom()}]
+  def list_decoders() do
+    Xav.Decoder.NIF.list_decoders()
+    |> Enum.map(fn {name, long_name, media_type} ->
+      {name, List.to_string(long_name), media_type}
+    end)
+    |> Enum.reverse()
+  end
+end
diff --git a/lib/decoder.ex → lib/xav/decoder.ex b/lib/decoder.ex → lib/xav/decoder.ex
@@ -5,65 +5,87 @@ defmodule Xav.Decoder do
 
   @typedoc """
   Supported codecs.
+
+  To get the list of available decoders see `Xav.list_decoders/0`.
   """
-  @type codec() :: :opus | :vp8 | :h264 | :h265
+  @type codec() :: atom()
 
   @type t() :: reference()
 
-  @typedoc """
-  Opts that can be passed to `new/2`.
-  """
-  @type opts :: [
-          out_format: Xav.Frame.format(),
-          out_sample_rate: integer(),
-          out_channels: integer(),
-          out_width: Xav.Frame.width(),
-          out_height: Xav.Frame.height()
-        ]
+  @decoder_options_schema [
+    out_format: [
+      type: :atom,
+      doc: """
+      Output format of the samples.
+
+      In case of video, it's the pixel format. In case of audio, it's the sample format.
+
+      To get the list of supported pixel formats use `Xav.pixel_formats/0`,
+      and for sample formats `Xav.sample_formats/0`.
+      """
+    ],
+    out_sample_rate: [
+      type: :pos_integer,
+      doc: """
+      Audio sample rate.
+
+      If not specified, the sample rate of the input stream will be used.
+      """
+    ],
+    out_channels: [
+      type: :pos_integer,
+      doc: """
+      Number of audio channels.
+
+      If not specified, the number of channels of the input stream will be used.
+
+      Audio samples are always in the packed form -
+      samples from different channels are interleaved in the same, single binary:
+
+      ```
+      <<c10, c20, c30, c11, c21, c31, c12, c22, c32>>
+      ```
+
+      An alternative would be to return a list of binaries, where
+      each binary represents different channel:
+      ```
+      [
+        <<c10, c11, c12, c13, c14>>,
+        <<c20, c21, c22, c23, c24>>,
+        <<c30, c31, c32, c33, c34>>
+      ]
+      ```
+      """
+    ],
+    out_width: [
+      type: :pos_integer,
+      doc: "Scale the output video frame to the provided width."
+    ],
+    out_height: [
+      type: :pos_integer,
+      doc: "Scale the output video frame to the provided height."
+    ]
+  ]
 
   @doc """
   Creates a new decoder.
 
-  `opts` can be used to specify desired output parameters.
-
-  E.g. if you want to change audio samples format just pass:
-
-  ```elixir
-  [out_format: :f32]
-  ```
+  `codec` is any audio/video decoder supported by `FFmpeg`.
 
-  or video samples format:
-
-  ```elixir
-  [out_format: :rgb24]
-  ```
-
-  Audio samples are always in the packed form -
-  samples from different channels are interleaved in the same, single binary:
-
-  ```
-  <<c10, c20, c30, c11, c21, c31, c12, c22, c32>>
-  ```
-
-  An alternative would be to return a list of binaries, where
-  each binary represents different channel:
-
-  ```
-  [
-    <<c10, c11, c12, c13, c14>>,
-    <<c20, c21, c22, c23, c24>>,
-    <<c30, c31, c32, c33, c34>>
-  ]
-  ```
+  `opts` can be used to specify desired output parameters:\n#{NimbleOptions.docs(@decoder_options_schema)}
   """
-  @spec new(codec(), opts()) :: t()
-  def new(codec, opts \\ []) do
-    out_format = opts[:out_format]
-    out_sample_rate = opts[:out_sample_rate] || 0
-    out_channels = opts[:out_channels] || 0
-    out_width = opts[:out_width] || -1
-    out_height = opts[:out_height] || -1
-    Xav.Decoder.NIF.new(codec, out_format, out_sample_rate, out_channels, out_width, out_height)
+  @spec new(codec(), Keyword.t()) :: t()
+  def new(codec, opts \\ []) when is_atom(codec) do
+    opts = NimbleOptions.validate!(opts, @decoder_options_schema)
+
+    Xav.Decoder.NIF.new(
+      codec,
+      opts[:out_format],
+      opts[:out_sample_rate] || 0,
+      opts[:out_channels] || 0,
+      opts[:out_width] || -1,
+      opts[:out_height] || -1
+    )
   end
 
   @doc """

diff --git a/lib/decoder_nif.ex → lib/xav/decoder_nif.ex b/lib/decoder_nif.ex → lib/xav/decoder_nif.ex
@@ -15,4 +15,10 @@ defmodule Xav.Decoder.NIF do
   def decode(_decoder, _data, _pts, _dts), do: :erlang.nif_error(:undef)
 
   def flush(_decoder), do: :erlang.nif_error(:undef)
+
+  def pixel_formats(), do: :erlang.nif_error(:undef)
+
+  def sample_formats(), do: :erlang.nif_error(:undef)
+
+  def list_decoders(), do: :erlang.nif_error(:undef)
 end
diff --git a/lib/encoder.ex → lib/xav/encoder.ex b/lib/encoder.ex → lib/xav/encoder.ex
diff --git a/lib/encoder_nif.ex → lib/xav/encoder_nif.ex b/lib/encoder_nif.ex → lib/xav/encoder_nif.ex
diff --git a/lib/frame.ex → lib/xav/frame.ex b/lib/frame.ex → lib/xav/frame.ex
diff --git a/lib/packet.ex → lib/xav/packet.ex b/lib/packet.ex → lib/xav/packet.ex
diff --git a/lib/reader.ex → lib/xav/reader.ex b/lib/reader.ex → lib/xav/reader.ex
diff --git a/lib/reader_nif.ex → lib/xav/reader_nif.ex b/lib/reader_nif.ex → lib/xav/reader_nif.ex
diff --git a/lib/video_converter.ex → lib/xav/video_converter.ex b/lib/video_converter.ex → lib/xav/video_converter.ex
diff --git a/lib/video_converter_nif.ex → lib/xav/video_converter_nif.ex b/lib/video_converter_nif.ex → lib/xav/video_converter_nif.ex
diff --git a/test/decoder_test.exs b/test/decoder_test.exs
@@ -348,8 +348,8 @@ defmodule Xav.DecoderTest do
                Xav.Decoder.flush(decoder)
     end
 
-    test "h265 video" do
-      decoder = Xav.Decoder.new(:h265)
+    test "hevc video" do
+      decoder = Xav.Decoder.new(:hevc)
 
       assert :ok = Xav.Decoder.decode(decoder, @h265_frame)