Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix cyclic JSON.stringify / primitive conversion stack overflows #777

Merged
merged 13 commits into from
Oct 5, 2020
30 changes: 29 additions & 1 deletion boa/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use crate::{
},
class::{Class, ClassBuilder},
exec::Interpreter,
object::{GcObject, Object, ObjectData, PROTOTYPE},
object::{GcObject, Object, ObjectData, RecursionLimiter, PROTOTYPE},
property::{Property, PropertyKey},
realm::Realm,
syntax::{
Expand Down Expand Up @@ -370,6 +370,18 @@ impl Context {

/// Converts an object to a primitive.
///
/// Diverges from the spec to prevent a stack overflow when the object is recursive.
/// For example,
/// ```javascript
/// let a = [1];
/// a[1] = a;
/// console.log(a.toString()); // We print "1,"
/// ```
/// The spec doesn't mention what to do in this situation, but a naive implementation
/// would overflow the stack recursively calling `toString()`. We follow v8 and SpiderMonkey
/// instead by returning a default value for the given `hint` -- either `0.` or `""`.
/// Example in v8: https://repl.it/repls/IvoryCircularCertification#index.js
///
/// More information:
/// - [ECMAScript][spec]
///
Expand All @@ -380,10 +392,26 @@ impl Context {
hint: PreferredType,
) -> Result<Value> {
// 1. Assert: Type(O) is Object.
// TODO: #776 this should also accept Type::Function
debug_assert!(o.get_type() == Type::Object);
// 2. Assert: Type(hint) is String and its value is either "string" or "number".
debug_assert!(hint == PreferredType::String || hint == PreferredType::Number);

// Diverge from the spec here to make sure we aren't going to overflow the stack by converting
// a recursive structure
// We can follow v8 & SpiderMonkey's lead and return a default value for the hint in this situation
// (see https://repl.it/repls/IvoryCircularCertification#index.js)
let obj = o.as_gc_object().unwrap(); // UNWRAP: Asserted type above
vgel marked this conversation as resolved.
Show resolved Hide resolved
let recursion_limiter = RecursionLimiter::new(&obj);
if recursion_limiter.live {
// we're in a recursive object, bail
return Ok(match hint {
PreferredType::Number => Value::number(0.),
PreferredType::String => Value::string(""),
vgel marked this conversation as resolved.
Show resolved Hide resolved
PreferredType::Default => unreachable!(), // checked hint type above
});
}

// 3. If hint is "string", then
// a. Let methodNames be « "toString", "valueOf" ».
// 4. Else,
Expand Down
98 changes: 64 additions & 34 deletions boa/src/object/gcobject.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use crate::{
use gc::{Finalize, Gc, GcCell, GcCellRef, GcCellRefMut, Trace};
use std::{
cell::RefCell,
collections::HashSet,
collections::HashMap,
error::Error,
fmt::{self, Debug, Display},
result::Result as StdResult,
Expand Down Expand Up @@ -302,69 +302,99 @@ impl Display for BorrowMutError {

impl Error for BorrowMutError {}

/// Prevents infinite recursion during `Debug::fmt`.
#[derive(Debug)]
struct RecursionLimiter {
/// If this was the first `GcObject` in the tree.
free: bool,
/// If this is the first time a specific `GcObject` has been seen.
first: bool,
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
enum RecursionValueState {
/// This value is "live": there's an active RecursionLimiter that hasn't been dropped.
Live,
/// This value has been seen before, but the recursion limiter has been dropped.
/// For example:
/// ```javascript
/// let b = [];
/// JSON.stringify([ // Create a recursion limiter for the root here
/// b, // state for b's &GcObject here is None
/// b, // state for b's &GcObject here is Visited
/// ]);
/// ```
Visited,
}

impl Clone for RecursionLimiter {
fn clone(&self) -> Self {
Self {
// Cloning this value would result in a premature free.
free: false,
// Cloning this vlaue would result in a value being written multiple times.
first: false,
}
}
/// Prevents infinite recursion during `Debug::fmt`, `JSON.stringify`, and other conversions.
/// This uses a thread local, so is not safe to use where the object graph will be traversed by
/// multiple threads!
#[derive(Debug)]
pub struct RecursionLimiter {
/// If this was the first `GcObject` in the tree.
top_level: bool,
/// The ptr being kept in the HashSet, so we can delete it when we drop.
ptr: usize,
/// If this GcObject has been visited before in the graph, but not in the current branch.
pub visited: bool,
/// If this GcObject has been visited in the current branch of the graph.
pub live: bool,
}

impl Drop for RecursionLimiter {
fn drop(&mut self) {
// Typically, calling hs.remove(ptr) for "first" objects would be the correct choice here. This would allow the
// same object to appear multiple times in the output (provided it does not appear under itself recursively).
// However, the JS object hierarchy involves quite a bit of repitition, and the sheer amount of data makes
// understanding the Debug output impossible; limiting the usefulness of it.
//
// Instead, the entire hashset is emptied at by the first GcObject involved. This means that objects will appear
// at most once, throughout the graph, hopefully making things a bit clearer.
if self.free {
Self::VISITED.with(|hs| hs.borrow_mut().clear());
if self.top_level {
// When the top level of the graph is dropped, we can free the entire map for the next traversal.
Self::SEEN.with(|hm| hm.borrow_mut().clear());
} else if !self.live {
// This was the first RL for this object to become live, so it's no longer live now that it's dropped.
Self::SEEN.with(|hm| {
hm.borrow_mut()
.insert(self.ptr, RecursionValueState::Visited)
});
}
}
}

impl RecursionLimiter {
thread_local! {
/// The list of pointers to `GcObject` that have been visited during the current `Debug::fmt` graph.
static VISITED: RefCell<HashSet<usize>> = RefCell::new(HashSet::new());
/// The map of pointers to `GcObject` that have been visited during the current `Debug::fmt` graph,
/// and the current state of their RecursionLimiter (dropped or live -- see `RecursionValueState`)
static SEEN: RefCell<HashMap<usize, RecursionValueState>> = RefCell::new(HashMap::new());
}

/// Determines if the specified `GcObject` has been visited, and returns a struct that will free it when dropped.
///
/// This is done by maintaining a thread-local hashset containing the pointers of `GcObject` values that have been
/// visited. The first `GcObject` visited will clear the hashset, while any others will check if they are contained
/// by the hashset.
fn new(o: &GcObject) -> Self {
pub fn new(o: &GcObject) -> Self {
// We shouldn't have to worry too much about this being moved during Debug::fmt.
let ptr = (o.as_ref() as *const _) as usize;
let (free, first) = Self::VISITED.with(|hs| {
let mut hs = hs.borrow_mut();
(hs.is_empty(), hs.insert(ptr))
let (top_level, visited, live) = Self::SEEN.with(|hm| {
let mut hm = hm.borrow_mut();
let top_level = hm.is_empty();
let old_state = hm.insert(ptr, RecursionValueState::Live);

(
top_level,
old_state == Some(RecursionValueState::Visited),
old_state == Some(RecursionValueState::Live),
)
});

Self { free, first }
Self {
top_level,
ptr,
visited,
live,
}
}
}

impl Debug for GcObject {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::fmt::Result {
let limiter = RecursionLimiter::new(&self);

if limiter.first {
// Typically, using `!limiter.live` would be good enough here.
// However, the JS object hierarchy involves quite a bit of repitition, and the sheer amount of data makes
// understanding the Debug output impossible; limiting the usefulness of it.
//
// Instead, we check if the object has appeared before in the entire graph. This means that objects will appear
// at most once, hopefully making things a bit clearer.
if !limiter.visited && !limiter.live {
f.debug_tuple("GcObject").field(&self.0).finish()
} else {
f.write_str("{ ... }")
Expand Down
2 changes: 1 addition & 1 deletion boa/src/object/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ mod gcobject;
mod internal_methods;
mod iter;

pub use gcobject::{GcObject, Ref, RefMut};
pub use gcobject::{GcObject, RecursionLimiter, Ref, RefMut};
pub use iter::*;

/// Static `prototype`, usually set on constructors as a key to point to their respective prototype object.
Expand Down
60 changes: 33 additions & 27 deletions boa/src/value/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use crate::{
number::{f64_to_int32, f64_to_uint32},
BigInt, Number,
},
object::{GcObject, Object, ObjectData, PROTOTYPE},
object::{GcObject, Object, ObjectData, RecursionLimiter, PROTOTYPE},
property::{Attribute, Property, PropertyKey},
BoaProfiler, Context, Result,
};
Expand Down Expand Up @@ -228,32 +228,7 @@ impl Value {
match *self {
Self::Null => Ok(JSONValue::Null),
Self::Boolean(b) => Ok(JSONValue::Bool(b)),
Self::Object(ref obj) => {
if obj.borrow().is_array() {
let mut keys: Vec<u32> = obj.borrow().index_property_keys().cloned().collect();
keys.sort();
let mut arr: Vec<JSONValue> = Vec::with_capacity(keys.len());
for key in keys {
let value = self.get_field(key);
if value.is_undefined() || value.is_function() || value.is_symbol() {
arr.push(JSONValue::Null);
} else {
arr.push(value.to_json(interpreter)?);
}
}
Ok(JSONValue::Array(arr))
} else {
let mut new_obj = Map::new();
for k in obj.borrow().keys() {
let key = k.clone();
let value = self.get_field(k.to_string());
if !value.is_undefined() && !value.is_function() && !value.is_symbol() {
new_obj.insert(key.to_string(), value.to_json(interpreter)?);
}
}
Ok(JSONValue::Object(new_obj))
}
}
Self::Object(ref obj) => self.object_to_json(obj, interpreter),
Self::String(ref str) => Ok(JSONValue::String(str.to_string())),
Self::Rational(num) => Ok(JSONNumber::from_f64(num)
.map(JSONValue::Number)
Expand All @@ -268,6 +243,37 @@ impl Value {
}
}

/// Converts an object to JSON, checking for reference cycles and throwing a TypeError if one is found
fn object_to_json(&self, obj: &GcObject, interpreter: &mut Context) -> Result<JSONValue> {
vgel marked this conversation as resolved.
Show resolved Hide resolved
let rec_limiter = RecursionLimiter::new(&obj);
if rec_limiter.live {
Err(interpreter.construct_type_error("cyclic object value"))
} else if obj.borrow().is_array() {
let mut keys: Vec<u32> = obj.borrow().index_property_keys().cloned().collect();
keys.sort();
let mut arr: Vec<JSONValue> = Vec::with_capacity(keys.len());
for key in keys {
let value = self.get_field(key);
if value.is_undefined() || value.is_function() || value.is_symbol() {
arr.push(JSONValue::Null);
} else {
arr.push(value.to_json(interpreter)?);
}
}
Ok(JSONValue::Array(arr))
} else {
let mut new_obj = Map::new();
for k in obj.borrow().keys() {
let key = k.clone();
let value = self.get_field(k.to_string());
if !value.is_undefined() && !value.is_function() && !value.is_symbol() {
new_obj.insert(key.to_string(), value.to_json(interpreter)?);
}
}
Ok(JSONValue::Object(new_obj))
}
}

/// This will tell us if we can exten an object or not, not properly implemented yet
///
/// For now always returns true.
Expand Down
105 changes: 105 additions & 0 deletions boa/src/value/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,111 @@ toString: {
);
}

mod cyclic_conversions {
use super::*;

#[test]
fn to_json_cyclic() {
let mut engine = Context::new();
let src = r#"
let a = [];
a[0] = a;
JSON.stringify(a)
"#;

assert_eq!(
forward(&mut engine, src),
r#"Uncaught "TypeError": "cyclic object value""#,
);
}

#[test]
fn to_json_noncyclic() {
let mut engine = Context::new();
let src = r#"
let b = [];
let a = [b, b];
JSON.stringify(a)
"#;

let value = forward_val(&mut engine, src).unwrap();
let result = value.as_string().unwrap();
assert_eq!(result, "[[],[]]",);
}

// These tests don't throw errors. Instead we mirror Chrome / Firefox behavior for these conversions
#[test]
fn to_string_cyclic() {
let mut engine = Context::new();
let src = r#"
let a = [];
a[0] = a;
a.toString()
"#;

let value = forward_val(&mut engine, src).unwrap();
let result = value.as_string().unwrap();
assert_eq!(result, "");
}

#[test]
fn to_number_cyclic() {
let mut engine = Context::new();
let src = r#"
let a = [];
a[0] = a;
+a
"#;

let value = forward_val(&mut engine, src).unwrap();
let result = value.as_number().unwrap();
assert_eq!(result, 0.);
}

#[test]
fn to_boolean_cyclic() {
// this already worked before the mitigation, but we don't want to cause a regression
let mut engine = Context::new();
let src = r#"
let a = [];
a[0] = a;
!!a
"#;

let value = forward_val(&mut engine, src).unwrap();
// There isn't an as_boolean function for some reason?
assert_eq!(value, Value::Boolean(true));
}

#[test]
fn to_bigint_cyclic() {
let mut engine = Context::new();
let src = r#"
let a = [];
a[0] = a;
BigInt(a)
"#;

let value = forward_val(&mut engine, src).unwrap();
let result = value.as_bigint().unwrap().to_f64();
assert_eq!(result, 0.);
}

#[test]
fn to_u32_cyclic() {
let mut engine = Context::new();
let src = r#"
let a = [];
a[0] = a;
a | 0
"#;

let value = forward_val(&mut engine, src).unwrap();
let result = value.as_number().unwrap();
assert_eq!(result, 0.);
}
}

mod abstract_relational_comparison {
use super::*;
macro_rules! check_comparison {
Expand Down