Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement a generator based on Arbitrary for compatibility #108

Merged
merged 22 commits into from
Feb 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 4 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,14 @@ jobs:
strategy:
fail-fast: false
matrix:
rust: [1.57.0, stable, beta, nightly]
rust: [1.57.0, 1.63.0, stable, beta, nightly]
os: [ubuntu-latest, macOS-latest]
# TODO add honggfuzz back
test: [unit-tests, libfuzzer, afl, examples-tests]
test: [unit-tests, unit-tests-no-1.57, libfuzzer, afl, examples-tests]
sanitizer: [NONE]
exclude:
- rust: 1.57.0
test: unit-tests-no-1.57
# honggfuzz is broken on macOS
- os: macos-latest
test: honggfuzz
Expand Down
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@ test_example:
--nocapture $(TEST_THREADS)

unit-tests:
@cargo test
cargo test

unit-tests-no-1.57:
cargo test --features arbitrary

test_fuzzers: $(FUZZERS)

Expand Down
1 change: 1 addition & 0 deletions bolero-generator/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ std = ["alloc", "either/use_std"]
alloc = ["rand_core/alloc"]

[dependencies]
arbitrary = { version = "1.0", optional = true }
Ekleog marked this conversation as resolved.
Show resolved Hide resolved
bolero-generator-derive = { version = "0.8.0", path = "../bolero-generator-derive" }
either = { version = "1.5", default-features = false, optional = true }
rand_core = { version = "^0.6", default-features = false }
Expand Down
49 changes: 49 additions & 0 deletions bolero-generator/src/arbitrary.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
use crate::{Driver, ValueGenerator};
use arbitrary::Unstructured;
use core::marker::PhantomData;

pub use arbitrary::Arbitrary;

pub struct ArbitraryGenerator<T>(PhantomData<T>);

impl<T> ValueGenerator for ArbitraryGenerator<T>
where
T: for<'a> Arbitrary<'a>,
{
type Output = T;

fn generate<D: Driver>(&self, driver: &mut D) -> Option<T> {
let len = match T::size_hint(0) {
(min, None) => min..=usize::MAX,
(min, Some(max)) => min..=max,
};
driver.gen_from_bytes(len, |b| {
let initial_len = b.len();
let mut b = Unstructured::new(b);
let res = T::arbitrary(&mut b).ok()?;
let remaining_len = b.len();
Some((initial_len - remaining_len, res))
})
}
}

#[inline]
pub fn gen_arbitrary<T>() -> ArbitraryGenerator<T>
where
T: for<'a> Arbitrary<'a>,
{
ArbitraryGenerator(PhantomData)
}

#[cfg(test)]
mod tests {
#[test]
fn tuple() {
let _ = generator_test!(gen_arbitrary::<(u8, u32, u64)>());
}

#[test]
fn vec() {
let _ = generator_test!(gen_arbitrary::<Vec<usize>>());
}
}
20 changes: 19 additions & 1 deletion bolero-generator/src/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::{
uniform::{FillBytes, Uniform},
TypeGenerator,
};
use core::ops::Bound;
use core::ops::{Bound, RangeInclusive};
use rand_core::RngCore;

#[macro_use]
Expand Down Expand Up @@ -48,6 +48,24 @@ pub trait Driver: Sized {
gen_method!(gen_char, char);

fn gen_bool(&mut self, probability: Option<f32>) -> Option<bool>;

/// Generate a value from bytes off this generator
///
/// `len` is the size of the slice that should be passed to `gen`. The range's minimal size
/// is the minimal amount of bytes needed to properly generate an input. The range's maximal
/// value should be so that every `T` can be generated by `gen` from a slice of this length.
///
/// `gen` is the function that actually does the generation. It takes as input the byte slice,
/// and returns either `None` (if not enough bytes were provided to build a `T`, this can
/// happen even with a slice of maximum length but should happen as rarely as possible), or
/// a `Some` value if it could generate a `T`. In this case, it also returns the number of
/// bytes that were actually consumed from the slice.
///
/// Note that `gen` may be called multiple times with increasing slice lengths, eg. if the
/// driver is in forced mode.
fn gen_from_bytes<Gen, T>(&mut self, len: RangeInclusive<usize>, gen: Gen) -> Option<T>
where
Gen: FnMut(&[u8]) -> Option<(usize, T)>;
}

/// Byte exhaustion strategy for the driver
Expand Down
35 changes: 19 additions & 16 deletions bolero-generator/src/driver/bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,30 +31,33 @@ impl<'a> FillBytes for ByteSliceDriver<'a> {
}

#[inline]
fn fill_bytes(&mut self, bytes: &mut [u8]) -> Option<()> {
fn peek_bytes(&mut self, offset: usize, bytes: &mut [u8]) -> Option<()> {
match self.mode {
DriverMode::Forced => {
let offset = self.input.len().min(bytes.len());
let (current, remaining) = self.input.split_at(offset);
let (bytes_to_fill, bytes_to_zero) = bytes.split_at_mut(offset);
bytes_to_fill.copy_from_slice(current);
for byte in bytes_to_zero.iter_mut() {
*byte = 0;
DriverMode::Direct => {
if (offset + bytes.len()) > self.input.len() {
None
} else {
bytes.copy_from_slice(&self.input[offset..(offset + bytes.len())]);
Some(())
}
self.input = remaining;
Some(())
}
DriverMode::Direct => {
if bytes.len() > self.input.len() {
return None;
DriverMode::Forced => {
if offset < self.input.len() {
let copy_len = std::cmp::min(bytes.len(), self.input.len() - offset);
bytes[..copy_len].copy_from_slice(&self.input[offset..(offset + copy_len)]);
bytes[copy_len..].fill(0);
} else {
bytes.fill(0);
}
let (current, remaining) = self.input.split_at(bytes.len());
bytes.copy_from_slice(current);
self.input = remaining;
Some(())
}
}
}

#[inline]
fn consume_bytes(&mut self, consumed: usize) {
self.input = &self.input[std::cmp::min(consumed, self.input.len())..];
}
}

impl<'a> Driver for ByteSliceDriver<'a> {
Expand Down
74 changes: 74 additions & 0 deletions bolero-generator/src/driver/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,5 +90,79 @@ macro_rules! gen_from_bytes {
Some(value < (u8::MAX / 2))
}
}

fn gen_from_bytes<Gen, T>(&mut self, len: RangeInclusive<usize>, mut gen: Gen) -> Option<T>
where
Gen: FnMut(&[u8]) -> Option<(usize, T)>,
{
// Even attempting an alloc of more than 0x10000000000 bytes makes asan crash.
// LibFuzzer limits memory to 2G (by default) and try_reserve() does not fail in oom situations then.
// With all the above, limit memory allocations to 1M at a time here.
const NONSENSICAL_SIZE: usize = 1024 * 1024;
const ABUSIVE_SIZE: usize = 1024;
const MIN_INCREASE: usize = 32;

match self.mode() {
DriverMode::Direct => {
let len = match (len.start(), len.end()) {
(s, e) if s == e => *s,
(s, e) => self.gen_usize(Bound::Included(s), Bound::Included(e))?,
};
if len >= NONSENSICAL_SIZE {
return None;
}
let mut data = Vec::new();
if let Err(_) = data.try_reserve(len) {
return None;
}
data.resize(len, 0);
self.peek_bytes(0, &mut data)?;
match gen(&data) {
None => None,
Some((consumed, res)) => {
self.consume_bytes(consumed);
Some(res)
}
}
}
DriverMode::Forced => {
let init_len = len.start()
+ self.gen_usize(
Bound::Included(&0),
Bound::Included(&std::cmp::min(ABUSIVE_SIZE, len.end() - len.start())),
)?;
let mut data = vec![0; init_len];
self.peek_bytes(0, &mut data)?;
loop {
match gen(&data) {
Some((consumed, res)) => {
self.consume_bytes(consumed);
return Some(res);
}
None => {
let max_additional_size = std::cmp::min(
ABUSIVE_SIZE,
len.end().saturating_sub(data.len()),
);
if max_additional_size == 0 {
self.consume_bytes(data.len());
return None; // we actually tried feeding the max amount of data already
}
let additional_size = self.gen_usize(
Bound::Included(&std::cmp::min(
MIN_INCREASE,
max_additional_size,
)),
Bound::Included(&max_additional_size),
)?;
let previous_len = data.len();
data.resize(data.len() + additional_size, 0);
self.peek_bytes(previous_len, &mut data[previous_len..]);
}
}
}
}
}
}
};
}
10 changes: 8 additions & 2 deletions bolero-generator/src/driver/rng.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,12 @@ impl<R: RngCore> FillBytes for DirectRng<R> {
}

#[inline]
fn fill_bytes(&mut self, bytes: &mut [u8]) -> Option<()> {
fn peek_bytes(&mut self, _offset: usize, bytes: &mut [u8]) -> Option<()> {
RngCore::try_fill_bytes(&mut self.0, bytes).ok()
}

#[inline]
fn consume_bytes(&mut self, _consumed: usize) {}
}

impl<R: RngCore> Driver for DirectRng<R> {
Expand All @@ -47,7 +50,7 @@ impl<R: RngCore> FillBytes for ForcedRng<R> {
}

#[inline]
fn fill_bytes(&mut self, bytes: &mut [u8]) -> Option<()> {
fn peek_bytes(&mut self, _offset: usize, bytes: &mut [u8]) -> Option<()> {
if RngCore::try_fill_bytes(&mut self.0, bytes).is_err() {
// if the rng fails to fill the remaining bytes, then we just start returning 0s
for byte in bytes.iter_mut() {
Expand All @@ -56,6 +59,9 @@ impl<R: RngCore> FillBytes for ForcedRng<R> {
}
Some(())
}

#[inline]
fn consume_bytes(&mut self, _consumed: usize) {}
}

impl<R: RngCore> Driver for ForcedRng<R> {
Expand Down
10 changes: 9 additions & 1 deletion bolero-generator/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ extern crate std;
pub mod std_generators;

pub use bolero_generator_derive::*;
#[cfg(feature = "arbitrary")]
pub mod arbitrary;

pub mod array;
pub mod atomic;
Expand All @@ -43,7 +45,10 @@ pub mod result;
pub mod time;
pub mod tuple;

pub use driver::Driver;
#[cfg(feature = "arbitrary")]
pub use crate::arbitrary::gen_arbitrary;

pub use crate::driver::Driver;

/// Generate a value for a given type
pub trait TypeGenerator: Sized {
Expand Down Expand Up @@ -256,4 +261,7 @@ pub mod prelude {
one_of::{one_of, one_value_of, OneOfExt, OneValueOfExt},
TypeGenerator, TypeGeneratorWithParams, ValueGenerator,
};

#[cfg(feature = "arbitrary")]
pub use crate::gen_arbitrary;
}
10 changes: 9 additions & 1 deletion bolero-generator/src/uniform.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,15 @@ pub trait Uniform: Sized {

pub trait FillBytes {
fn mode(&self) -> DriverMode;
fn fill_bytes(&mut self, bytes: &mut [u8]) -> Option<()>;

fn peek_bytes(&mut self, offset: usize, bytes: &mut [u8]) -> Option<()>;
fn consume_bytes(&mut self, consumed: usize);

fn fill_bytes(&mut self, bytes: &mut [u8]) -> Option<()> {
self.peek_bytes(0, bytes)?;
self.consume_bytes(bytes.len());
Some(())
}
}

macro_rules! uniform_int {
Expand Down
21 changes: 21 additions & 0 deletions bolero-kani/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ mod kani {
// no-op
let _ = cond;
}

pub mod vec {
pub fn any_vec<T, const N: usize>() -> Vec<T> {
todo!()
}
}
}

#[doc(hidden)]
Expand Down Expand Up @@ -143,6 +149,21 @@ pub mod lib {
fn gen_bool(&mut self, _probability: Option<f32>) -> Option<bool> {
Some(kani::any())
}

fn gen_from_bytes<Gen, T>(
&mut self,
len: std::ops::RangeInclusive<usize>,
mut gen: Gen,
) -> Option<T>
where
Gen: FnMut(&[u8]) -> Option<(usize, T)>,
{
let bytes = kani::vec::any_vec::<u8, 256>();
kani::assume(len.contains(&bytes.len()));
let value = gen(&bytes).map(|v| v.1);
kani::assume(value.is_some());
value
}
}
}

Expand Down
1 change: 1 addition & 0 deletions bolero/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ readme = "../README.md"
default = ["std"]
std = ["alloc", "bolero-generator/std"]
alloc = ["bolero-generator/alloc"]
arbitrary = ["bolero-generator/arbitrary"]

[dependencies]
bolero-engine = { version = "0.8", path = "../bolero-engine" }
Expand Down