From 985760f5609125f1ca4797c1df9ba69ab01aab85 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Date: Fri, 29 Jul 2022 06:09:18 +0100 Subject: [PATCH] Avoid boxing in PrimitiveDictionaryBuilder (#2216) --- .../builder/primitive_dictionary_builder.rs | 50 +++++++++++++------ 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/arrow/src/array/builder/primitive_dictionary_builder.rs b/arrow/src/array/builder/primitive_dictionary_builder.rs index b74bf9a43b2..71656f985d1 100644 --- a/arrow/src/array/builder/primitive_dictionary_builder.rs +++ b/arrow/src/array/builder/primitive_dictionary_builder.rs @@ -16,6 +16,7 @@ // under the License. use std::any::Any; +use std::collections::hash_map::Entry; use std::collections::HashMap; use std::sync::Arc; @@ -26,6 +27,26 @@ use crate::error::{ArrowError, Result}; use super::ArrayBuilder; use super::PrimitiveBuilder; +/// Wraps a type implementing `ToByteSlice` implementing `Hash` and `Eq` for it +/// +/// This is necessary to handle types such as f32, which don't natively implement these +#[derive(Debug)] +struct Value(T); + +impl std::hash::Hash for Value { + fn hash(&self, state: &mut H) { + self.0.to_byte_slice().hash(state) + } +} + +impl PartialEq for Value { + fn eq(&self, other: &Self) -> bool { + self.0.to_byte_slice().eq(other.0.to_byte_slice()) + } +} + +impl Eq for Value {} + /// Array builder for `DictionaryArray`. For example to map a set of byte indices /// to f32 values. Note that the use of a `HashMap` here will not scale to very large /// arrays or result in an ordered dictionary. @@ -71,7 +92,7 @@ where { keys_builder: PrimitiveBuilder, values_builder: PrimitiveBuilder, - map: HashMap, K::Native>, + map: HashMap, K::Native>, } impl PrimitiveDictionaryBuilder @@ -138,19 +159,20 @@ where /// value is appended to the values array. #[inline] pub fn append(&mut self, value: V::Native) -> Result { - if let Some(&key) = self.map.get(value.to_byte_slice()) { - // Append existing value. - self.keys_builder.append_value(key); - Ok(key) - } else { - // Append new value. - let key = K::Native::from_usize(self.values_builder.len()) - .ok_or(ArrowError::DictionaryKeyOverflowError)?; - self.values_builder.append_value(value); - self.keys_builder.append_value(key as K::Native); - self.map.insert(value.to_byte_slice().into(), key); - Ok(key) - } + let key = match self.map.entry(Value(value)) { + Entry::Vacant(vacant) => { + // Append new value. + let key = K::Native::from_usize(self.values_builder.len()) + .ok_or(ArrowError::DictionaryKeyOverflowError)?; + self.values_builder.append_value(value); + vacant.insert(key); + key + } + Entry::Occupied(o) => *o.get(), + }; + + self.keys_builder.append_value(key); + Ok(key) } #[inline]