-
Notifications
You must be signed in to change notification settings - Fork 151
Open
Description
It seems difficult to efficiently map an ArrayVec, i.e. something that's length preserving. For code like this:
#[inline]
fn map_f(v: &u8) -> f64 {
f64::from(*v)
}
pub fn map_arr_iter_ix(a: arrayvec::ArrayVec<u8, 24>) -> arrayvec::ArrayVec<f64, 24> {
arrayvec::ArrayVec::from_iter((0..a.len()).map(|ix| map_f(&a[ix])))
}
pub fn map_arr_iter_value(a: arrayvec::ArrayVec<u8, 24>) -> arrayvec::ArrayVec<f64, 24> {
arrayvec::ArrayVec::from_iter(a.iter().map(map_f))
}It generates ASM that includes capacity checks, panic code etc. The ASM is similar enough in both cases that I only include one.
.section .text.models::map_arr_iter_value,"ax",@progbits
.globl models::map_arr_iter_value
.p2align 4
.type models::map_arr_iter_value,@function
models::map_arr_iter_value:
// /home/shana/programming/engine/models/src/lib.rs : 379
pub fn map_arr_iter_value(a: arrayvec::ArrayVec<u8, 24>) -> arrayvec::ArrayVec<f64, 24> {
.cfi_startproc
push rbp
.cfi_def_cfa_offset 16
push r15
.cfi_def_cfa_offset 24
push r14
.cfi_def_cfa_offset 32
push r13
.cfi_def_cfa_offset 40
push r12
.cfi_def_cfa_offset 48
push rbx
.cfi_def_cfa_offset 56
sub rsp, 200
.cfi_def_cfa_offset 256
.cfi_offset rbx, -56
.cfi_offset r12, -48
.cfi_offset r13, -40
.cfi_offset r14, -32
.cfi_offset r15, -24
.cfi_offset rbp, -16
mov rbx, rdi
// /home/shana/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/arrayvec-0.7.6/src/arrayvec.rs : 114
pub const fn len(&self) -> usize { self.len as usize }
mov r12d, dword ptr [rsi]
test r12, r12
// /nix/store/6ljk7nx7g00ypxdcixl2lbwn7kcmd4jw-rust-default-1.91.0/lib/rustlib/src/rust/library/core/src/slice/iter/macros.rs : 179
if ptr == crate::intrinsics::transmute::<$ptr, NonNull<T>>(end_or_len) {
je .LBB962_1
mov r14, rsi
// /nix/store/6ljk7nx7g00ypxdcixl2lbwn7kcmd4jw-rust-default-1.91.0/lib/rustlib/src/rust/library/core/src/ptr/mut_ptr.rs : 961
unsafe { intrinsics::offset(self, count) }
add r14, 4
// /home/shana/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/arrayvec-0.7.6/src/arrayvec.rs : 1101
if ptr == end_ptr && CHECK { extend_panic(); }
mov r13, r12
shl r13, 3
xor ebp, ebp
jmp .LBB962_3
.p2align 4
.LBB962_5:
vcvtsi2sd xmm0, xmm15, r15d
inc r14
// /nix/store/6ljk7nx7g00ypxdcixl2lbwn7kcmd4jw-rust-default-1.91.0/lib/rustlib/src/rust/library/core/src/ptr/mod.rs : 1940
intrinsics::write_via_move(dst, src)
vmovsd qword ptr [rsp + rbp + 8], xmm0
add rbp, 8
// /nix/store/6ljk7nx7g00ypxdcixl2lbwn7kcmd4jw-rust-default-1.91.0/lib/rustlib/src/rust/library/core/src/ptr/non_null.rs : 1683
self.as_ptr() == other.as_ptr()
cmp r13, rbp
// /nix/store/6ljk7nx7g00ypxdcixl2lbwn7kcmd4jw-rust-default-1.91.0/lib/rustlib/src/rust/library/core/src/slice/iter/macros.rs : 179
if ptr == crate::intrinsics::transmute::<$ptr, NonNull<T>>(end_or_len) {
je .LBB962_6
.LBB962_3:
// /nix/store/6ljk7nx7g00ypxdcixl2lbwn7kcmd4jw-rust-default-1.91.0/lib/rustlib/src/rust/library/core/src/ops/function.rs : 310
(*self).call_mut(args)
movzx r15d, byte ptr [r14]
// /home/shana/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/arrayvec-0.7.6/src/arrayvec.rs : 1101
if ptr == end_ptr && CHECK { extend_panic(); }
cmp rbp, 192
jne .LBB962_5
lea rdi, [rip + .Lanon.acc18acdac17d7dd77cba6a0167c7603.34]
call qword ptr [rip + arrayvec::arrayvec::extend_panic@GOTPCREL]
jmp .LBB962_5
.LBB962_1:
xor r12d, r12d
.LBB962_6:
// /home/shana/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/arrayvec-0.7.6/src/arrayvec.rs : 1095
**self_len = len as LenUint;
mov dword ptr [rsp], r12d
// /home/shana/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/arrayvec-0.7.6/src/arrayvec.rs : 1148
array
mov rax, qword ptr [rsp + 192]
mov qword ptr [rbx + 192], rax
vmovups ymm0, ymmword ptr [rsp + 160]
vmovups ymmword ptr [rbx + 160], ymm0
vmovups ymm0, ymmword ptr [rsp + 32]
vmovups ymm1, ymmword ptr [rsp + 64]
vmovups ymm2, ymmword ptr [rsp + 96]
vmovups ymm3, ymmword ptr [rsp + 128]
vmovups ymmword ptr [rbx + 128], ymm3
vmovups ymmword ptr [rbx + 96], ymm2
vmovups ymmword ptr [rbx + 64], ymm1
vmovups ymmword ptr [rbx + 32], ymm0
mov eax, dword ptr [rsp]
mov dword ptr [rbx], eax
vmovups xmm0, xmmword ptr [rsp + 4]
vmovups xmmword ptr [rbx + 4], xmm0
mov rax, qword ptr [rsp + 20]
mov qword ptr [rbx + 20], rax
mov eax, dword ptr [rsp + 28]
mov dword ptr [rbx + 28], eax
// /home/shana/programming/engine/models/src/lib.rs : 381
}
mov rax, rbx
add rsp, 200
.cfi_def_cfa_offset 56
pop rbx
.cfi_def_cfa_offset 48
pop r12
.cfi_def_cfa_offset 40
pop r13
.cfi_def_cfa_offset 32
pop r14
.cfi_def_cfa_offset 24
pop r15
.cfi_def_cfa_offset 16
pop rbp
.cfi_def_cfa_offset 8
vzeroupper
retAfter playing around with various options, I have come up with this as the best option
/// Map from one arrayvec (references) into a new one. If the target capacity is
/// lower than the original one, the mapping will only happen on the part that
/// fits (rather than panicking).
pub fn map_ref<T, U, const N: usize, const M: usize>(
a: &arrayvec::ArrayVec<T, N>,
mut f: impl FnMut(&T) -> U,
) -> arrayvec::ArrayVec<U, M> {
// const M: usize = N;
let mut arr = arrayvec::ArrayVec::<U, M>::new_const();
// The .take(N) makes the compiler realise that slice must have at most `N`
// elements. You can actually achieve the same thing with an
// `assert!(slice.len() <= N)` and it'll compile that away. Of course _we_
// know it's true but if you omit both the `.take()` and the assert! then it
// suddenly forgets to keep track of the fact and adds panic checks in the
// try_push below.
//
// Further, at no loss of expressivity nor runtime cost at all, we can
// generate a different capacity ArrayVec and simply push as much as we can
// into it: all it takes is changing `.take(N)` into `.take(N.min(M))` and
// it'll generate sane code in all the cases.
let iter = a.as_slice().iter().take(N.min(M));
for v in iter {
// unwrap never fails because .take() ensures we're within capacity
// bounds. Compiler realises this and omits and checks here. If we were
// using no-panic create, we could put it on this function to ensure
// this...
arr.try_push(f(v)).unwrap();
}
arr
}This generates ASM without any panics. But it feels like we should just have method for this.
I threw in support for different sized arrayvecs because it was trivial and we have an actual use-case for this where we map an arrayvec except the last element, so this is prefect).
Metadata
Metadata
Assignees
Labels
No labels