Skip to content

Panic reading avro file at datafusion-6.0.0/src/avro_to_arrow/arrow_array_reader.rs:771:37 #1785

@joshuarobinson

Description

@joshuarobinson

Describe the bug
When trying to use "read_avro()" or "register_avro()" with datafusion 6.0 (with feature=avro) and a certain avro file, I consistently get a panic. Other avro files with different schemas have been okay.
thread 'main' panicked at 'expected struct got None', /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/avro_to_arrow/arrow_array_reader.rs:771:37
The avro file is correctly decoded when using avro-tools-1.11.0.jar.

To Reproduce
I have simplified the logic to the following test program: https://gist.github.com/joshuarobinson/413536d5affd751eb9d8958a970e8b04

and I'm attaching a link to the 6KB avro file that causes me the problem.

Expected behavior
Print out the contents of the avro file with basic datafusion "df.collect.show" type logic.

Additional context
The problematic avro file is part of Apache Iceberg metadata, fwiw.

Stacktrace:

stack backtrace:
   0: rust_begin_unwind
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/std/src/panicking.rs:517:5
   1: core::panicking::panic_fmt
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/panicking.rs:101:14
   2: datafusion::avro_to_arrow::arrow_array_reader::AvroArrowArrayReader<R>::build_struct_array::{{closure}}::{{closure}}
             at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/avro_to_arrow/arrow_array_reader.rs:771:37
   3: core::iter::adapters::map::map_fold::{{closure}}
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/map.rs:84:28
   4: core::iter::adapters::map::map_fold::{{closure}}
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/map.rs:84:21
   5: <core::iter::adapters::enumerate::Enumerate<I> as core::iter::traits::iterator::Iterator>::fold::enumerate::{{closure}}
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/enumerate.rs:106:27
   6: core::iter::traits::iterator::Iterator::fold
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/traits/iterator.rs:2170:21
   7: <core::iter::adapters::enumerate::Enumerate<I> as core::iter::traits::iterator::Iterator>::fold
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/enumerate.rs:112:9
   8: <core::iter::adapters::map::Map<I,F> as core::iter::traits::iterator::Iterator>::fold
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/map.rs:124:9
   9: <core::iter::adapters::map::Map<I,F> as core::iter::traits::iterator::Iterator>::fold
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/map.rs:124:9
  10: core::iter::traits::iterator::Iterator::for_each
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/traits/iterator.rs:733:9
  11: <alloc::vec::Vec<T,A> as alloc::vec::spec_extend::SpecExtend<T,I>>::spec_extend
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/alloc/src/vec/spec_extend.rs:40:17
  12: <alloc::vec::Vec<T> as alloc::vec::spec_from_iter_nested::SpecFromIterNested<T,I>>::from_iter
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/alloc/src/vec/spec_from_iter_nested.rs:56:9
  13: <alloc::vec::Vec<T> as alloc::vec::spec_from_iter::SpecFromIter<T,I>>::from_iter
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/alloc/src/vec/spec_from_iter.rs:33:9
  14: <alloc::vec::Vec<T> as core::iter::traits::collect::FromIterator<T>>::from_iter
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/alloc/src/vec/mod.rs:2486:9
  15: core::iter::traits::iterator::Iterator::collect
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/traits/iterator.rs:1745:9
  16: datafusion::avro_to_arrow::arrow_array_reader::AvroArrowArrayReader<R>::build_struct_array::{{closure}}
             at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/avro_to_arrow/arrow_array_reader.rs:762:43
  17: core::iter::adapters::map::map_try_fold::{{closure}}
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/map.rs:91:28
  18: core::iter::adapters::filter::filter_try_fold::{{closure}}
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/filter.rs:44:44
  19: core::iter::traits::iterator::Iterator::try_fold
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/traits/iterator.rs:1994:21
  20: <core::iter::adapters::filter::Filter<I,P> as core::iter::traits::iterator::Iterator>::try_fold
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/filter.rs:93:9
  21: <core::iter::adapters::map::Map<I,F> as core::iter::traits::iterator::Iterator>::try_fold
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/map.rs:117:9
  22: <core::iter::adapters::ResultShunt<I,E> as core::iter::traits::iterator::Iterator>::try_fold
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/mod.rs:177:9
  23: core::iter::traits::iterator::Iterator::find
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/traits/iterator.rs:2381:9
  24: <core::iter::adapters::ResultShunt<I,E> as core::iter::traits::iterator::Iterator>::next
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/mod.rs:159:9
  25: alloc::vec::Vec<T,A>::extend_desugared
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/alloc/src/vec/mod.rs:2583:35
  26: <alloc::vec::Vec<T,A> as alloc::vec::spec_extend::SpecExtend<T,I>>::spec_extend
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/alloc/src/vec/spec_extend.rs:18:9
  27: <alloc::vec::Vec<T> as alloc::vec::spec_from_iter_nested::SpecFromIterNested<T,I>>::from_iter
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/alloc/src/vec/spec_from_iter_nested.rs:37:9
  28: <alloc::vec::Vec<T> as alloc::vec::spec_from_iter::SpecFromIter<T,I>>::from_iter
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/alloc/src/vec/spec_from_iter.rs:33:9
  29: <alloc::vec::Vec<T> as core::iter::traits::collect::FromIterator<T>>::from_iter
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/alloc/src/vec/mod.rs:2486:9
  30: core::iter::traits::iterator::Iterator::collect
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/traits/iterator.rs:1745:9
  31: <core::result::Result<V,E> as core::iter::traits::collect::FromIterator<core::result::Result<A,E>>>::from_iter::{{closure}}
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/result.rs:1887:53
  32: core::iter::adapters::process_results
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/mod.rs:148:17
  33: <core::result::Result<V,E> as core::iter::traits::collect::FromIterator<core::result::Result<A,E>>>::from_iter
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/result.rs:1887:9
  34: core::iter::traits::iterator::Iterator::collect
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/traits/iterator.rs:1745:9
  35: datafusion::avro_to_arrow::arrow_array_reader::AvroArrowArrayReader<R>::build_struct_array
             at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/avro_to_arrow/arrow_array_reader.rs:610:50
  36: datafusion::avro_to_arrow::arrow_array_reader::AvroArrowArrayReader<R>::build_struct_array::{{closure}}
             at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/avro_to_arrow/arrow_array_reader.rs:776:29
  37: core::iter::adapters::map::map_try_fold::{{closure}}
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/map.rs:91:28
  38: core::iter::adapters::filter::filter_try_fold::{{closure}}
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/filter.rs:44:44
  39: core::iter::traits::iterator::Iterator::try_fold
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/traits/iterator.rs:1994:21
  40: <core::iter::adapters::filter::Filter<I,P> as core::iter::traits::iterator::Iterator>::try_fold
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/filter.rs:93:9
  41: <core::iter::adapters::map::Map<I,F> as core::iter::traits::iterator::Iterator>::try_fold
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/map.rs:117:9
  42: <core::iter::adapters::ResultShunt<I,E> as core::iter::traits::iterator::Iterator>::try_fold
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/mod.rs:177:9
  43: core::iter::traits::iterator::Iterator::find
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/traits/iterator.rs:2381:9
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/traits/iterator.rs:1745:9
  55: datafusion::avro_to_arrow::arrow_array_reader::AvroArrowArrayReader<R>::build_struct_array
             at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/avro_to_arrow/arrow_array_reader.rs:610:50
  56: datafusion::avro_to_arrow::arrow_array_reader::AvroArrowArrayReader<R>::next_batch
             at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/avro_to_arrow/arrow_array_reader.rs:119:13
  57: datafusion::avro_to_arrow::reader::Reader<R>::next
             at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/avro_to_arrow/reader.rs:159:9
  58: <datafusion::avro_to_arrow::reader::Reader<R> as core::iter::traits::iterator::Iterator>::next
             at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/avro_to_arrow/reader.rs:167:9
  59: <alloc::boxed::Box<I,A> as core::iter::traits::iterator::Iterator>::next
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/alloc/src/boxed.rs:1570:9
  60: datafusion::physical_plan::file_format::file_stream::FileStream<F>::next_batch
             at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/physical_plan/file_format/file_stream.rs:116:15
  61: datafusion::physical_plan::file_format::file_stream::FileStream<F>::next_batch::{{closure}}
             at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/physical_plan/file_format/file_stream.rs:130:29
  62: core::result::Result<T,E>::and_then
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/result.rs:966:22
  63: datafusion::physical_plan::file_format::file_stream::FileStream<F>::next_batch
             at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/physical_plan/file_format/file_stream.rs:124:21
  64: <datafusion::physical_plan::file_format::file_stream::FileStream<F> as futures_core::stream::Stream>::poll_next
             at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/physical_plan/file_format/file_stream.rs:150:40
  65: <core::pin::Pin<P> as futures_core::stream::Stream>::poll_next
             at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/futures-core-0.3.21/src/stream.rs:120:9
  66: <S as futures_core::stream::TryStream>::try_poll_next
             at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/futures-core-0.3.21/src/stream.rs:196:9
  67: <futures_util::stream::try_stream::try_collect::TryCollect<St,C> as core::future::future::Future>::poll
             at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/futures-util-0.3.21/src/stream/try_stream/try_collect.rs:46:26
  68: datafusion::physical_plan::common::collect::{{closure}}
             at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/physical_plan/common.rs:79:5
  69: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/future/mod.rs:80:19
  70: datafusion::physical_plan::collect::{{closure}}
             at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/physical_plan/mod.rs:315:5
  71: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/future/mod.rs:80:19
  72: <datafusion::execution::dataframe_impl::DataFrameImpl as datafusion::dataframe::DataFrame>::collect::{{closure}}
             at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/execution/dataframe_impl.rs:165:12
  73: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/future/mod.rs:80:19
  74: <core::pin::Pin<P> as core::future::future::Future>::poll
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/future/future.rs:119:9
  75: read_avro::main::{{closure}}
             at ./src/main.rs:22:19
  76: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/future/mod.rs:80:19
  77: tokio::park::thread::CachedParkThread::block_on::{{closure}}
             at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.16.1/src/park/thread.rs:263:54
  78: tokio::coop::with_budget::{{closure}}
             at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.16.1/src/coop.rs:102:9
  79: std::thread::local::LocalKey<T>::try_with
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/std/src/thread/local.rs:399:16
  80: std::thread::local::LocalKey<T>::with
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/std/src/thread/local.rs:375:9
  81: tokio::coop::with_budget
             at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.16.1/src/coop.rs:95:5
  82: tokio::coop::budget
             at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.16.1/src/coop.rs:72:5
  83: tokio::park::thread::CachedParkThread::block_on
             at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.16.1/src/park/thread.rs:263:31
  84: tokio::runtime::enter::Enter::block_on
             at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.16.1/src/runtime/enter.rs:151:13
  85: tokio::runtime::thread_pool::ThreadPool::block_on
             at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.16.1/src/runtime/thread_pool/mod.rs:73:9
  86: tokio::runtime::Runtime::block_on
             at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.16.1/src/runtime/mod.rs:477:43
  87: read_avro::main
             at ./src/main.rs:27:5
  88: core::ops::function::FnOnce::call_once
             at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/ops/function.rs:227:5

The contents of the avro look like:

$ java -jar ~/avro-tools-1.11.0.jar tojson test.avro 
{"status":1,"snapshot_id":{"long":4486040319718220013},"data_file":{"file_path":"/warehouse/db/table/data/ts_hour=2022-02-07-17/00015-195989-291e52f5-b4b3-45a8-9bfd-da0bf8277229-00001.parquet","file_format":"PARQUET","partition":{"ts_hour":{"int":456737}},"record_count":1,"file_size_in_bytes":1254,"block_size_in_bytes":67108864,"column_sizes":{"array":[{"key":1,"value":57},{"key":2,"value":63},{"key":3,"value":53},{"key":4,"value":53}]},"value_counts":{"array":[{"key":1,"value":1},{"key":2,"value":1},{"key":3,"value":1},{"key":4,"value":1}]},"null_value_counts":{"array":[{"key":1,"value":0},{"key":2,"value":0},{"key":3,"value":0},{"key":4,"value":0}]},"nan_value_counts":{"array":[{"key":3,"value":0},{"key":4,"value":0}]},"lower_bounds":{"array":[{"key":1,"value":"À¹¬Dq×\u0005\u0000"},{"key":2,"value":"8e1a0dc0a079"},{"key":3,"value":"��!A"},{"key":4,"value":"M\u000BºB"}]},"upper_bounds":{"array":[{"key":1,"value":"À¹¬Dq×\u0005\u0000"},{"key":2,"value":"8e1a0dc0a079"},{"key":3,"value":"��!A"},{"key":4,"value":"M\u000BºB"}]},"key_metadata":null,"split_offsets":{"array":[4]},"sort_order_id":{"int":0}}}

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions