-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Description
Describe the bug
When trying to use "read_avro()" or "register_avro()" with datafusion 6.0 (with feature=avro) and a certain avro file, I consistently get a panic. Other avro files with different schemas have been okay.
thread 'main' panicked at 'expected struct got None', /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/avro_to_arrow/arrow_array_reader.rs:771:37
The avro file is correctly decoded when using avro-tools-1.11.0.jar.
To Reproduce
I have simplified the logic to the following test program: https://gist.github.com/joshuarobinson/413536d5affd751eb9d8958a970e8b04
and I'm attaching a link to the 6KB avro file that causes me the problem.
Expected behavior
Print out the contents of the avro file with basic datafusion "df.collect.show" type logic.
Additional context
The problematic avro file is part of Apache Iceberg metadata, fwiw.
Stacktrace:
stack backtrace:
0: rust_begin_unwind
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/std/src/panicking.rs:517:5
1: core::panicking::panic_fmt
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/panicking.rs:101:14
2: datafusion::avro_to_arrow::arrow_array_reader::AvroArrowArrayReader<R>::build_struct_array::{{closure}}::{{closure}}
at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/avro_to_arrow/arrow_array_reader.rs:771:37
3: core::iter::adapters::map::map_fold::{{closure}}
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/map.rs:84:28
4: core::iter::adapters::map::map_fold::{{closure}}
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/map.rs:84:21
5: <core::iter::adapters::enumerate::Enumerate<I> as core::iter::traits::iterator::Iterator>::fold::enumerate::{{closure}}
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/enumerate.rs:106:27
6: core::iter::traits::iterator::Iterator::fold
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/traits/iterator.rs:2170:21
7: <core::iter::adapters::enumerate::Enumerate<I> as core::iter::traits::iterator::Iterator>::fold
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/enumerate.rs:112:9
8: <core::iter::adapters::map::Map<I,F> as core::iter::traits::iterator::Iterator>::fold
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/map.rs:124:9
9: <core::iter::adapters::map::Map<I,F> as core::iter::traits::iterator::Iterator>::fold
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/map.rs:124:9
10: core::iter::traits::iterator::Iterator::for_each
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/traits/iterator.rs:733:9
11: <alloc::vec::Vec<T,A> as alloc::vec::spec_extend::SpecExtend<T,I>>::spec_extend
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/alloc/src/vec/spec_extend.rs:40:17
12: <alloc::vec::Vec<T> as alloc::vec::spec_from_iter_nested::SpecFromIterNested<T,I>>::from_iter
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/alloc/src/vec/spec_from_iter_nested.rs:56:9
13: <alloc::vec::Vec<T> as alloc::vec::spec_from_iter::SpecFromIter<T,I>>::from_iter
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/alloc/src/vec/spec_from_iter.rs:33:9
14: <alloc::vec::Vec<T> as core::iter::traits::collect::FromIterator<T>>::from_iter
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/alloc/src/vec/mod.rs:2486:9
15: core::iter::traits::iterator::Iterator::collect
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/traits/iterator.rs:1745:9
16: datafusion::avro_to_arrow::arrow_array_reader::AvroArrowArrayReader<R>::build_struct_array::{{closure}}
at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/avro_to_arrow/arrow_array_reader.rs:762:43
17: core::iter::adapters::map::map_try_fold::{{closure}}
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/map.rs:91:28
18: core::iter::adapters::filter::filter_try_fold::{{closure}}
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/filter.rs:44:44
19: core::iter::traits::iterator::Iterator::try_fold
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/traits/iterator.rs:1994:21
20: <core::iter::adapters::filter::Filter<I,P> as core::iter::traits::iterator::Iterator>::try_fold
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/filter.rs:93:9
21: <core::iter::adapters::map::Map<I,F> as core::iter::traits::iterator::Iterator>::try_fold
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/map.rs:117:9
22: <core::iter::adapters::ResultShunt<I,E> as core::iter::traits::iterator::Iterator>::try_fold
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/mod.rs:177:9
23: core::iter::traits::iterator::Iterator::find
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/traits/iterator.rs:2381:9
24: <core::iter::adapters::ResultShunt<I,E> as core::iter::traits::iterator::Iterator>::next
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/mod.rs:159:9
25: alloc::vec::Vec<T,A>::extend_desugared
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/alloc/src/vec/mod.rs:2583:35
26: <alloc::vec::Vec<T,A> as alloc::vec::spec_extend::SpecExtend<T,I>>::spec_extend
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/alloc/src/vec/spec_extend.rs:18:9
27: <alloc::vec::Vec<T> as alloc::vec::spec_from_iter_nested::SpecFromIterNested<T,I>>::from_iter
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/alloc/src/vec/spec_from_iter_nested.rs:37:9
28: <alloc::vec::Vec<T> as alloc::vec::spec_from_iter::SpecFromIter<T,I>>::from_iter
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/alloc/src/vec/spec_from_iter.rs:33:9
29: <alloc::vec::Vec<T> as core::iter::traits::collect::FromIterator<T>>::from_iter
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/alloc/src/vec/mod.rs:2486:9
30: core::iter::traits::iterator::Iterator::collect
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/traits/iterator.rs:1745:9
31: <core::result::Result<V,E> as core::iter::traits::collect::FromIterator<core::result::Result<A,E>>>::from_iter::{{closure}}
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/result.rs:1887:53
32: core::iter::adapters::process_results
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/mod.rs:148:17
33: <core::result::Result<V,E> as core::iter::traits::collect::FromIterator<core::result::Result<A,E>>>::from_iter
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/result.rs:1887:9
34: core::iter::traits::iterator::Iterator::collect
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/traits/iterator.rs:1745:9
35: datafusion::avro_to_arrow::arrow_array_reader::AvroArrowArrayReader<R>::build_struct_array
at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/avro_to_arrow/arrow_array_reader.rs:610:50
36: datafusion::avro_to_arrow::arrow_array_reader::AvroArrowArrayReader<R>::build_struct_array::{{closure}}
at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/avro_to_arrow/arrow_array_reader.rs:776:29
37: core::iter::adapters::map::map_try_fold::{{closure}}
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/map.rs:91:28
38: core::iter::adapters::filter::filter_try_fold::{{closure}}
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/filter.rs:44:44
39: core::iter::traits::iterator::Iterator::try_fold
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/traits/iterator.rs:1994:21
40: <core::iter::adapters::filter::Filter<I,P> as core::iter::traits::iterator::Iterator>::try_fold
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/filter.rs:93:9
41: <core::iter::adapters::map::Map<I,F> as core::iter::traits::iterator::Iterator>::try_fold
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/map.rs:117:9
42: <core::iter::adapters::ResultShunt<I,E> as core::iter::traits::iterator::Iterator>::try_fold
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/adapters/mod.rs:177:9
43: core::iter::traits::iterator::Iterator::find
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/traits/iterator.rs:2381:9
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/iter/traits/iterator.rs:1745:9
55: datafusion::avro_to_arrow::arrow_array_reader::AvroArrowArrayReader<R>::build_struct_array
at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/avro_to_arrow/arrow_array_reader.rs:610:50
56: datafusion::avro_to_arrow::arrow_array_reader::AvroArrowArrayReader<R>::next_batch
at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/avro_to_arrow/arrow_array_reader.rs:119:13
57: datafusion::avro_to_arrow::reader::Reader<R>::next
at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/avro_to_arrow/reader.rs:159:9
58: <datafusion::avro_to_arrow::reader::Reader<R> as core::iter::traits::iterator::Iterator>::next
at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/avro_to_arrow/reader.rs:167:9
59: <alloc::boxed::Box<I,A> as core::iter::traits::iterator::Iterator>::next
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/alloc/src/boxed.rs:1570:9
60: datafusion::physical_plan::file_format::file_stream::FileStream<F>::next_batch
at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/physical_plan/file_format/file_stream.rs:116:15
61: datafusion::physical_plan::file_format::file_stream::FileStream<F>::next_batch::{{closure}}
at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/physical_plan/file_format/file_stream.rs:130:29
62: core::result::Result<T,E>::and_then
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/result.rs:966:22
63: datafusion::physical_plan::file_format::file_stream::FileStream<F>::next_batch
at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/physical_plan/file_format/file_stream.rs:124:21
64: <datafusion::physical_plan::file_format::file_stream::FileStream<F> as futures_core::stream::Stream>::poll_next
at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/physical_plan/file_format/file_stream.rs:150:40
65: <core::pin::Pin<P> as futures_core::stream::Stream>::poll_next
at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/futures-core-0.3.21/src/stream.rs:120:9
66: <S as futures_core::stream::TryStream>::try_poll_next
at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/futures-core-0.3.21/src/stream.rs:196:9
67: <futures_util::stream::try_stream::try_collect::TryCollect<St,C> as core::future::future::Future>::poll
at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/futures-util-0.3.21/src/stream/try_stream/try_collect.rs:46:26
68: datafusion::physical_plan::common::collect::{{closure}}
at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/physical_plan/common.rs:79:5
69: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/future/mod.rs:80:19
70: datafusion::physical_plan::collect::{{closure}}
at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/physical_plan/mod.rs:315:5
71: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/future/mod.rs:80:19
72: <datafusion::execution::dataframe_impl::DataFrameImpl as datafusion::dataframe::DataFrame>::collect::{{closure}}
at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/datafusion-6.0.0/src/execution/dataframe_impl.rs:165:12
73: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/future/mod.rs:80:19
74: <core::pin::Pin<P> as core::future::future::Future>::poll
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/future/future.rs:119:9
75: read_avro::main::{{closure}}
at ./src/main.rs:22:19
76: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/future/mod.rs:80:19
77: tokio::park::thread::CachedParkThread::block_on::{{closure}}
at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.16.1/src/park/thread.rs:263:54
78: tokio::coop::with_budget::{{closure}}
at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.16.1/src/coop.rs:102:9
79: std::thread::local::LocalKey<T>::try_with
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/std/src/thread/local.rs:399:16
80: std::thread::local::LocalKey<T>::with
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/std/src/thread/local.rs:375:9
81: tokio::coop::with_budget
at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.16.1/src/coop.rs:95:5
82: tokio::coop::budget
at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.16.1/src/coop.rs:72:5
83: tokio::park::thread::CachedParkThread::block_on
at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.16.1/src/park/thread.rs:263:31
84: tokio::runtime::enter::Enter::block_on
at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.16.1/src/runtime/enter.rs:151:13
85: tokio::runtime::thread_pool::ThreadPool::block_on
at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.16.1/src/runtime/thread_pool/mod.rs:73:9
86: tokio::runtime::Runtime::block_on
at /home/ir/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.16.1/src/runtime/mod.rs:477:43
87: read_avro::main
at ./src/main.rs:27:5
88: core::ops::function::FnOnce::call_once
at /rustc/59eed8a2aac0230a8b53e89d4e99d55912ba6b35/library/core/src/ops/function.rs:227:5
The contents of the avro look like:
$ java -jar ~/avro-tools-1.11.0.jar tojson test.avro
{"status":1,"snapshot_id":{"long":4486040319718220013},"data_file":{"file_path":"/warehouse/db/table/data/ts_hour=2022-02-07-17/00015-195989-291e52f5-b4b3-45a8-9bfd-da0bf8277229-00001.parquet","file_format":"PARQUET","partition":{"ts_hour":{"int":456737}},"record_count":1,"file_size_in_bytes":1254,"block_size_in_bytes":67108864,"column_sizes":{"array":[{"key":1,"value":57},{"key":2,"value":63},{"key":3,"value":53},{"key":4,"value":53}]},"value_counts":{"array":[{"key":1,"value":1},{"key":2,"value":1},{"key":3,"value":1},{"key":4,"value":1}]},"null_value_counts":{"array":[{"key":1,"value":0},{"key":2,"value":0},{"key":3,"value":0},{"key":4,"value":0}]},"nan_value_counts":{"array":[{"key":3,"value":0},{"key":4,"value":0}]},"lower_bounds":{"array":[{"key":1,"value":"À¹¬Dq×\u0005\u0000"},{"key":2,"value":"8e1a0dc0a079"},{"key":3,"value":"��!A"},{"key":4,"value":"M\u000BºB"}]},"upper_bounds":{"array":[{"key":1,"value":"À¹¬Dq×\u0005\u0000"},{"key":2,"value":"8e1a0dc0a079"},{"key":3,"value":"��!A"},{"key":4,"value":"M\u000BºB"}]},"key_metadata":null,"split_offsets":{"array":[4]},"sort_order_id":{"int":0}}}