From 841f8cf927abca92ff78312954d780abb918dc4f Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Thu, 8 Apr 2021 00:55:46 +0200 Subject: [PATCH 1/2] Add test ruling out eager expansion of custom arguments. Closes #1221. Implements the approach suggested in that issue. --- exercises/practice/forth/.meta/config.json | 3 +- .../practice/forth/tests/alloc-attack.rs | 96 +++++++++++++++++++ 2 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 exercises/practice/forth/tests/alloc-attack.rs diff --git a/exercises/practice/forth/.meta/config.json b/exercises/practice/forth/.meta/config.json index 43c90eb56..723784031 100644 --- a/exercises/practice/forth/.meta/config.json +++ b/exercises/practice/forth/.meta/config.json @@ -8,7 +8,8 @@ "src/lib.rs" ], "test": [ - "tests/forth.rs" + "tests/forth.rs", + "tests/alloc-attack.rs" ], "example": [ ".meta/example.rs" diff --git a/exercises/practice/forth/tests/alloc-attack.rs b/exercises/practice/forth/tests/alloc-attack.rs new file mode 100644 index 000000000..861dfe733 --- /dev/null +++ b/exercises/practice/forth/tests/alloc-attack.rs @@ -0,0 +1,96 @@ +//! Certain implementations of Forth naively and eagerly expand sub-definitions +//! into superior definitions at definition time. While this appears at first to +//! be a clever trick to solve the problem imposed by the +//! `can_use_different_words_with_the_same_name` test, it has its own problem: it +//! can be attacked to cause an OOM condition in an arbitrary machine without +//! requiring that any instructions are actually executed; only definitions. +//! +//! This test doesn't go quite that far, but it does demonstrate the attack and +//! require that implementations don't just do the naive thing. + +use forth::Forth; + +#[test] +#[ignore] +fn alloc_attack() { + let mut f = Forth::new(); + f.eval(": a 0 drop ;").unwrap(); + f.eval(": b a a ;").unwrap(); + f.eval(": c b b ;").unwrap(); + f.eval(": d c c ;").unwrap(); + f.eval(": e d d ;").unwrap(); + f.eval(": f e e ;").unwrap(); + f.eval(": g f f ;").unwrap(); + f.eval(": h g g ;").unwrap(); + f.eval(": i h h ;").unwrap(); + f.eval(": j i i ;").unwrap(); + f.eval(": k j j ;").unwrap(); + f.eval(": l k k ;").unwrap(); + f.eval(": m l l ;").unwrap(); + f.eval(": n m m ;").unwrap(); + f.eval(": o n n ;").unwrap(); + f.eval(": p o o ;").unwrap(); + f.eval(": q p p ;").unwrap(); + f.eval(": r q q ;").unwrap(); + f.eval(": s r r ;").unwrap(); + f.eval(": t s s ;").unwrap(); + f.eval(": u t t ;").unwrap(); + f.eval(": v u u ;").unwrap(); + f.eval(": w v v ;").unwrap(); + f.eval(": x w w ;").unwrap(); + f.eval(": y x x ;").unwrap(); + f.eval(": z y y ;").unwrap(); + + // On an implementation with eager expansion of sub-custom-words, + // `z`'s definition is 2**26 items long. Assuming the implementation + // has compacted each instruction into a single byte, that takes up + // over 50 Mb of memory. Less efficient implementations will require + // more. + // + // This shouldn't crash or hang anyone's machine, but it's at least a + // testable proposition. A good implementation shouldn't be doing eager + // stack expansion, so it should require much less than that. + + // Sanity check--few implementations should fail here. + assert!(f.stack().is_empty()); + + // We want to allow some wiggle-room, as this exercise isn't really meant + // to test the ability to micro-optimize. Still, let's ensure that the total + // allocated memory is substantially less than expected from a naive solution. + // + // A megabyte seems like a reasonable number to use. + assert!(GLOBAL_ALLOCATOR.get_bytes_allocated() < 1024 * 1024); +} + +// From this point forward, we define the custom `GLOBAL_ALLOCATOR` used in the test above. + +use std::alloc::{GlobalAlloc, Layout, System as SystemAllocator}; +use std::sync::atomic::{AtomicU64, Ordering}; + +/// This allocator wraps the default allocator, and keeps track of how much +/// memory has been allocated. +/// +/// Inspired by https://www.reddit.com/r/rust/comments/8z83wc/is_there_any_way_to_benchmark_memory_usage_in_rust/e2h4dp9 +struct TrackingAllocator(A, AtomicU64); + +unsafe impl GlobalAlloc for TrackingAllocator { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + self.1.fetch_add(layout.size() as u64, Ordering::SeqCst); + self.0.alloc(layout) + } + + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + self.0.dealloc(ptr, layout); + self.1.fetch_sub(layout.size() as u64, Ordering::SeqCst); + } +} + +impl TrackingAllocator { + fn get_bytes_allocated(&self) -> u64 { + self.1.load(Ordering::SeqCst) + } +} + +#[global_allocator] +static GLOBAL_ALLOCATOR: TrackingAllocator = + TrackingAllocator(SystemAllocator, AtomicU64::new(0)); From 741d46970a7afda5a7bcd54c73a1719edce50540 Mon Sep 17 00:00:00 2001 From: Peter Goodspeed-Niklaus Date: Thu, 8 Apr 2021 01:21:10 +0200 Subject: [PATCH 2/2] update example to pass new test --- exercises/practice/forth/.meta/example.rs | 303 ++++++++++------------ 1 file changed, 136 insertions(+), 167 deletions(-) diff --git a/exercises/practice/forth/.meta/example.rs b/exercises/practice/forth/.meta/example.rs index 24d4c0529..20983a9f9 100644 --- a/exercises/practice/forth/.meta/example.rs +++ b/exercises/practice/forth/.meta/example.rs @@ -1,22 +1,12 @@ +//! This solution authored by exercism user `2deth`: +//! https://exercism.io/mentor/solutions/b6f9f69b03df4b889c9930960cb4a358?iteration_idx=8 + use std::collections::HashMap; -use std::collections::VecDeque; -use std::str::FromStr; -pub type Value = i32; +pub type Value = i16; pub type ForthResult = Result<(), Error>; -type StackResult = Result; - -type Stack = Vec; -type Code = VecDeque; -type Definitions = HashMap; - -pub struct Forth { - code: Code, - defs: Definitions, - stack: Stack, -} -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Eq)] pub enum Error { DivisionByZero, StackUnderflow, @@ -24,181 +14,160 @@ pub enum Error { InvalidWord, } -#[derive(Debug, Clone)] -enum Term { - Number(Value), - Word(String), - StartDefinition, - EndDefinition, +#[derive(Default)] +pub struct Forth { + stack: Vec, + words: HashMap, + definitions: Vec>, } -impl FromStr for Term { - type Err = (); - - fn from_str(s: &str) -> Result { - match s { - ":" => Ok(Term::StartDefinition), - ";" => Ok(Term::EndDefinition), - _ => Err(()), - } - .or_else(|_| Value::from_str(s).map(Term::Number)) - .or_else(|_| Ok(Term::Word(s.to_ascii_lowercase()))) - } +// Instead of `usize`, to reduce `size_of::`, we use `u16`. This puts a +// maximum limit on the number of custom words this implementation can handle, +// but as a practical matter, it's plenty. +type Id = u16; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum Op { + Add, + Sub, + Mul, + Div, + Dup, + Drop, + Swap, + Over, + Push(Value), + Call(Id), } impl Forth { - pub fn new() -> Forth { - Forth { - code: Code::new(), - defs: HashMap::new(), - stack: Stack::new(), - } + pub fn new() -> Self { + Self::default() } pub fn stack(&self) -> &[Value] { - self.stack.as_slice() + &self.stack } pub fn eval(&mut self, input: &str) -> ForthResult { - let mut new_code = Forth::into_code(input); - self.code.append(&mut new_code); - self.run() - } - - fn run(&mut self) -> ForthResult { - while let Some(term) = self.code.pop_front() { - self.step_term(term)?; + #[derive(Debug, PartialEq, Eq)] + enum Mode { + Execution, + Word, + Definition(String, Vec), } - Forth::ok() - } + let mut mode = Mode::Execution; - fn step_term(&mut self, term: Term) -> ForthResult { - match term { - Term::Number(value) => self.push(value), - Term::Word(word) => self.step_word(&word), - Term::StartDefinition => self.store_definition(), - Term::EndDefinition => { - eprintln!("`;` without preceding `:`"); - Err(Error::InvalidWord) - } - } - } - - fn step_word(&mut self, word: &str) -> ForthResult { - if let Some(def) = self.defs.get(word) { - let mut def = def.clone(); - while let Some(term) = def.pop_front() { - self.step_term(term)?; + for token in input.to_uppercase().split_whitespace() { + match mode { + Mode::Execution => { + if token == ":" { + mode = Mode::Word; + } else { + eval_op( + parse_op(token, &self.words)?, + &mut self.stack, + &self.words, + &self.definitions, + )?; + } + } + Mode::Word => { + if token.parse::().is_ok() { + return Err(Error::InvalidWord); + } + mode = Mode::Definition(token.into(), Vec::new()); + } + Mode::Definition(_, ref mut definition) => { + if token == ";" { + if let Mode::Definition(word, definition) = + std::mem::replace(&mut mode, Mode::Execution) + { + self.definitions.push(definition); + self.words.insert(word, self.definitions.len() as Id - 1); + } else { + unreachable!(); + } + } else { + definition.push(parse_op(token, &self.words)?); + } + } } - Self::ok() - } else { - self.step_built_in(word) } - } - fn divide((a, b): (Value, Value)) -> StackResult { - a.checked_div(b).ok_or_else(|| { - eprintln!("Cannot divide {} by {}", a, b); - Error::DivisionByZero - }) + (mode == Mode::Execution) + .then(|| ()) + .ok_or(Error::InvalidWord) } +} - fn step_built_in(&mut self, word: &str) -> ForthResult { - match word { - "+" => self.bin_op(|(a, b)| Ok(a + b)), - "-" => self.bin_op(|(a, b)| Ok(a - b)), - "*" => self.bin_op(|(a, b)| Ok(a * b)), - "/" => self.bin_op(Self::divide), - "dup" => self.pop().and_then(|a| self.push(a).and(self.push(a))), - "drop" => self.pop().and(Forth::ok()), - "swap" => self - .pop_two() - .and_then(|(a, b)| self.push(b).and(self.push(a))), - "over" => self - .pop_two() - .and_then(|(a, b)| self.push(a).and(self.push(b)).and(self.push(a))), - _ => { - eprintln!("{} is undefined", word); - Err(Error::UnknownWord) - } +fn parse_op(token: &str, words: &HashMap) -> Result { + Ok(if let Some(id) = words.get(token) { + Op::Call(*id) + } else { + match token { + "+" => Op::Add, + "-" => Op::Sub, + "*" => Op::Mul, + "/" => Op::Div, + "DUP" => Op::Dup, + "DROP" => Op::Drop, + "SWAP" => Op::Swap, + "OVER" => Op::Over, + _ => Op::Push(token.parse::().map_err(|_| Error::UnknownWord)?), } - } - - fn store_definition(&mut self) -> ForthResult { - let mut def = Code::new(); + }) +} - loop { - match self.code.pop_front() { - Some(Term::EndDefinition) => break, - Some(term) => def.push_back(term), - None => return Err(Error::InvalidWord), - } +fn eval_op( + op: Op, + stack: &mut Vec, + words: &HashMap, + definitions: &Vec>, +) -> ForthResult { + let mut pop = || stack.pop().ok_or(Error::StackUnderflow); + match op { + Op::Add => { + let (rhs, lhs) = (pop()?, pop()?); + stack.push(lhs + rhs); } - - if let Some(Term::Word(name)) = def.pop_front() { - self.store_word(name, def) - } else { - Err(Error::InvalidWord) + Op::Sub => { + let (rhs, lhs) = (pop()?, pop()?); + stack.push(lhs - rhs); } - } - - fn push(&mut self, value: Value) -> ForthResult { - self.stack.push(value); - Forth::ok() - } - - fn pop(&mut self) -> StackResult { - self.stack.pop().ok_or_else(|| { - eprintln!("Stack underflow"); - Error::StackUnderflow - }) - } - - fn pop_two(&mut self) -> StackResult<(Value, Value)> { - self.pop().and_then(|b| self.pop().map(|a| (a, b))) - } - - fn bin_op(&mut self, op: F) -> ForthResult - where - F: FnOnce((Value, Value)) -> StackResult, - { - self.pop_two() - .and_then(op) - .and_then(|value| self.push(value)) - } - - fn store_word(&mut self, name: String, code: Code) -> ForthResult { - let mut resolved_def = Code::new(); - for t in code.iter() { - match t { - Term::Number(_) => resolved_def.push_back(t.clone()), - Term::Word(s) => { - if let Some(cs) = self.defs.get(s) { - resolved_def.append(&mut cs.clone()); - } else { - resolved_def.push_back(t.clone()); - } - } - _ => { - eprintln!("Nested definition in {}", name); - return Err(Error::InvalidWord); - } + Op::Mul => { + let (rhs, lhs) = (pop()?, pop()?); + stack.push(lhs * rhs); + } + Op::Div => { + let (rhs, lhs) = (pop()?, pop()?); + let quotient = lhs.checked_div(rhs).ok_or(Error::DivisionByZero)?; + stack.push(quotient); + } + Op::Dup => { + let top = *stack.last().ok_or(Error::StackUnderflow)?; + stack.push(top); + } + Op::Drop => { + pop()?; + } + Op::Swap => { + let (top, below) = (pop()?, pop()?); + stack.push(top); + stack.push(below); + } + Op::Over => { + let below = *stack.iter().nth_back(1).ok_or(Error::StackUnderflow)?; + stack.push(below); + } + Op::Push(num) => { + stack.push(num); + } + Op::Call(fn_id) => { + for op in &definitions[fn_id as usize] { + eval_op(*op, stack, words, definitions)?; } } - self.defs.insert(name, resolved_def); - Forth::ok() - } - - fn into_code(input: &str) -> Code { - input - .split(|c: char| c.is_whitespace() || c.is_control()) - .map(Term::from_str) - .filter(Result::is_ok) - .map(Result::unwrap) - .collect() - } - - fn ok() -> ForthResult { - Ok(()) } + Ok(()) }