Skip to content

Commit 3ae48bd

Browse files
authored
Merge pull request #14 from vorner/inline-array-align
Fix alignment issues of InlineArray
2 parents 40aa74b + 9a2e789 commit 3ae48bd

File tree

1 file changed

+117
-11
lines changed

1 file changed

+117
-11
lines changed

Diff for: src/inline_array/mod.rs

+117-11
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ use core::cmp::Ordering;
1111
use core::fmt::{Debug, Error, Formatter};
1212
use core::hash::{Hash, Hasher};
1313
use core::iter::FromIterator;
14-
use core::marker::PhantomData;
1514
use core::mem::{self, MaybeUninit};
1615
use core::ops::{Deref, DerefMut};
1716
use core::ptr;
@@ -25,7 +24,7 @@ pub use self::iter::{Drain, Iter};
2524
/// This works like a vector, but allocated on the stack (and thus marginally
2625
/// faster than `Vec`), with the allocated space exactly matching the size of
2726
/// the given type `T`. The vector consists of a `usize` tracking its current
28-
/// length, followed by zero or more elements of type `A`. The capacity is thus
27+
/// length and zero or more elements of type `A`. The capacity is thus
2928
/// `( size_of::<T>() - size_of::<usize>() ) / size_of::<A>()`. This could lead
3029
/// to situations where the capacity is zero, if `size_of::<A>()` is greater
3130
/// than `size_of::<T>() - size_of::<usize>()`, which is not an error and
@@ -72,9 +71,37 @@ pub use self::iter::{Drain, Iter};
7271
///
7372
/// Both of these will have the same size, and we can swap the `Inline` case out
7473
/// with the `Full` case once the `InlineArray` runs out of capacity.
74+
#[repr(C)]
7575
pub struct InlineArray<A, T> {
76+
// Alignment tricks
77+
//
78+
// We need both the usize header and data to be properly aligned in memory. We do a few tricks
79+
// to handle that.
80+
//
81+
// * An alignment is always power of 2. Therefore, with a pair of alignments, one is always
82+
// a multiple of the other (one way or the other).
83+
// * A struct is aligned to at least the max alignment of each of its fields.
84+
// * A repr(C) struct follows the order of fields and pushes each as close to the previous one
85+
// as allowed by alignment.
86+
//
87+
// By placing two "fake" fields that have 0 size, but an alignment first, we make sure that all
88+
// 3 start at the beginning of the struct and that all of them are aligned to their maximum
89+
// alignment.
90+
//
91+
// Furthermore, because we don't know if usize or A has bigger alignment, we decide on case by
92+
// case basis if the header or the elements go first. By placing the one with higher alignment
93+
// requirements first, we align that one and the other one will be aligned "automatically" when
94+
// placed just after it.
95+
//
96+
// To the best of our knowledge, this is all guaranteed by the compiler. But just to make sure,
97+
// we have bunch of asserts in the constructor to check; as these are invariants enforced by
98+
// the compiler, it should be trivial for it to remove the checks so they are for free (if we
99+
// are correct) or will save us (if we are not).
100+
//
101+
// Additionally, the [A; 0] serves as a form of PhantomData.
102+
_header_align: [usize; 0],
103+
_data_align: [A; 0],
76104
data: MaybeUninit<T>,
77-
phantom: PhantomData<A>,
78105
}
79106

80107
const fn capacity(host_size: usize, header_size: usize, element_size: usize) -> usize {
@@ -89,32 +116,67 @@ impl<A, T> InlineArray<A, T> {
89116
const HOST_SIZE: usize = mem::size_of::<T>();
90117
const ELEMENT_SIZE: usize = mem::size_of::<A>();
91118
const HEADER_SIZE: usize = mem::size_of::<usize>();
119+
// Do we place the header before the elements or the other way around?
120+
const HEADER_FIRST: bool = mem::align_of::<usize>() >= mem::align_of::<A>();
121+
// Note: one of the following is always 0
122+
// How many usizes to skip before the first element?
123+
const ELEMENT_SKIP: usize = Self::HEADER_FIRST as usize;
124+
// How many elements to skip before the header
125+
const HEADER_SKIP: usize = Self::CAPACITY * (1 - Self::ELEMENT_SKIP);
92126

93127
/// The maximum number of elements the `InlineArray` can hold.
94128
pub const CAPACITY: usize = capacity(Self::HOST_SIZE, Self::HEADER_SIZE, Self::ELEMENT_SIZE);
95129

96130
#[inline]
97131
#[must_use]
98132
unsafe fn len_const(&self) -> *const usize {
99-
(&self.data) as *const _ as *const usize
133+
let ptr = self
134+
.data
135+
.as_ptr()
136+
.cast::<A>()
137+
.add(Self::HEADER_SKIP)
138+
.cast::<usize>();
139+
debug_assert!(ptr as usize % mem::align_of::<usize>() == 0);
140+
ptr
100141
}
101142

102143
#[inline]
103144
#[must_use]
104145
pub(crate) unsafe fn len_mut(&mut self) -> *mut usize {
105-
(&mut self.data) as *mut _ as *mut usize
146+
let ptr = self
147+
.data
148+
.as_mut_ptr()
149+
.cast::<A>()
150+
.add(Self::HEADER_SKIP)
151+
.cast::<usize>();
152+
debug_assert!(ptr as usize % mem::align_of::<usize>() == 0);
153+
ptr
106154
}
107155

108156
#[inline]
109157
#[must_use]
110158
pub(crate) unsafe fn data(&self) -> *const A {
111-
self.len_const().add(1) as *const _ as *const A
159+
let ptr = self
160+
.data
161+
.as_ptr()
162+
.cast::<usize>()
163+
.add(Self::ELEMENT_SKIP)
164+
.cast::<A>();
165+
debug_assert!(ptr as usize % mem::align_of::<A>() == 0);
166+
ptr
112167
}
113168

114169
#[inline]
115170
#[must_use]
116171
unsafe fn data_mut(&mut self) -> *mut A {
117-
self.len_mut().add(1) as *mut _ as *mut A
172+
let ptr = self
173+
.data
174+
.as_mut_ptr()
175+
.cast::<usize>()
176+
.add(Self::ELEMENT_SKIP)
177+
.cast::<A>();
178+
debug_assert!(ptr as usize % mem::align_of::<A>() == 0);
179+
ptr
118180
}
119181

120182
#[inline]
@@ -164,12 +226,36 @@ impl<A, T> InlineArray<A, T> {
164226
#[inline]
165227
#[must_use]
166228
pub fn new() -> Self {
167-
debug_assert!(Self::HOST_SIZE > Self::HEADER_SIZE);
229+
assert!(Self::HOST_SIZE > Self::HEADER_SIZE);
168230
let mut self_ = Self {
231+
_header_align: [],
232+
_data_align: [],
169233
data: MaybeUninit::uninit(),
170-
phantom: PhantomData,
171234
};
172-
unsafe { *self_.len_mut() = 0 }
235+
// Sanity check our assumptions about what is guaranteed by the compiler. If we are right,
236+
// these should completely optimize out of the resulting binary.
237+
assert_eq!(
238+
&self_ as *const _ as usize,
239+
self_.data.as_ptr() as usize,
240+
"Padding at the start of struct",
241+
);
242+
assert_eq!(mem::size_of::<Self>(), mem::size_of::<T>());
243+
assert_eq!(
244+
self_.data.as_ptr() as usize % mem::align_of::<usize>(),
245+
0,
246+
"Unaligned header"
247+
);
248+
assert_eq!(
249+
self_.data.as_ptr() as usize % mem::align_of::<A>(),
250+
0,
251+
"Unaligned elements"
252+
);
253+
assert!(
254+
mem::align_of::<A>() % mem::align_of::<usize>() == 0
255+
|| mem::align_of::<usize>() % mem::align_of::<A>() == 0
256+
);
257+
assert!(Self::ELEMENT_SKIP == 0 || Self::HEADER_SKIP == 0);
258+
unsafe { ptr::write(self_.len_mut(), 0usize) };
173259
self_
174260
}
175261

@@ -270,7 +356,7 @@ impl<A, T> InlineArray<A, T> {
270356

271357
#[inline]
272358
unsafe fn drop_contents(&mut self) {
273-
ptr::drop_in_place::<[A]>(&mut **self)
359+
ptr::drop_in_place::<[A]>(&mut **self) // uses DerefMut
274360
}
275361

276362
/// Discard the contents of the array.
@@ -516,4 +602,24 @@ mod test {
516602
assert_eq!(65536, chunk.len());
517603
assert_eq!(Some(()), chunk.pop());
518604
}
605+
606+
#[test]
607+
fn low_align_base() {
608+
let mut chunk: InlineArray<String, [u8; 512]> = InlineArray::new();
609+
chunk.push("Hello".to_owned());
610+
assert_eq!(chunk[0], "Hello");
611+
}
612+
613+
#[test]
614+
fn big_align_elem() {
615+
#[repr(align(256))]
616+
struct BigAlign(usize);
617+
618+
let mut chunk: InlineArray<BigAlign, [usize; 256]> = InlineArray::new();
619+
chunk.push(BigAlign(42));
620+
assert_eq!(
621+
chunk.get(0).unwrap() as *const _ as usize % mem::align_of::<BigAlign>(),
622+
0
623+
);
624+
}
519625
}

0 commit comments

Comments
 (0)