Skip to content

Commit

Permalink
Fix use-after-free when invoking implicitly_convert_to_string
Browse files Browse the repository at this point in the history
`implicitly_convert_to_string` returns a byte slice view into an
underlying `mrb_value` backed by an `RString`. This function is marked
`unsafe` because it returns a borrowed value that *may be garbage
collected*.

This commit audits all call sites to check if the VM is invoked between
implicit conversion and final use, which may cause the `RString` to be
garbage collected.

This bug was found when debug logging paths into `Memory` load path
loader which showed garbage bytes.

This commit adds additional safety documentation to
`implicitly_convert_to_string` and adds safety comments to all unsafe
blocks that invoke this function.
  • Loading branch information
lopopolo committed Mar 21, 2021
1 parent 2ed18a1 commit 692a45d
Show file tree
Hide file tree
Showing 9 changed files with 102 additions and 24 deletions.
4 changes: 4 additions & 0 deletions artichoke-backend/src/convert/implicit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,10 @@ pub fn implicitly_convert_to_int(interp: &mut Artichoke, value: Value) -> Result
///
/// Callers must ensure that `value` does not outlive the given interpreter.
///
/// If a garbage collection can possibly run between calling this function and
/// using the returned slice, callers should convert the slice to an owned byte
/// vec.
///
/// [`Symbol`]: crate::extn::core::symbol::Symbol
pub unsafe fn implicitly_convert_to_string<'a>(
interp: &mut Artichoke,
Expand Down
5 changes: 5 additions & 0 deletions artichoke-backend/src/extn/core/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,11 @@ impl Array {
out.push(formatted.into_bytes());
}
_ => {
// Safety:
//
// `s` is converted to an owned byte vec immediately before
// any intervening operaitons on the VM which may cause a
// garbage collection of the `RString` that backs `value`.
if let Ok(s) = unsafe { implicitly_convert_to_string(interp, &mut value) } {
out.push(s.to_vec());
} else {
Expand Down
7 changes: 6 additions & 1 deletion artichoke-backend/src/extn/core/array/trampoline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,13 @@ pub fn plus(interp: &mut Artichoke, mut ary: Value, mut other: Value) -> Result<

pub fn mul(interp: &mut Artichoke, mut ary: Value, mut joiner: Value) -> Result<Value, Error> {
let array = unsafe { Array::unbox_from_value(&mut ary, interp)? };
// Safety:
//
// Convert separator to an owned byte vec to ensure the `RString` backing
// `joiner` is not garbage collected when invoking `to_s` during `join`.
if let Ok(separator) = unsafe { implicitly_convert_to_string(interp, &mut joiner) } {
let s = array.join(interp, separator)?;
let separator = separator.to_vec();
let s = array.join(interp, &separator)?;
Ok(interp.convert_mut(s))
} else {
let n = implicitly_convert_to_int(interp, joiner)?;
Expand Down
4 changes: 4 additions & 0 deletions artichoke-backend/src/extn/core/kernel/integer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,10 @@ impl<'a> TryConvertMut<&'a mut Value, IntegerString<'a>> for Artichoke {
message.push_str(self.inspect_type_name_for_value(*value));
message.push_str(" into Integer");

// Safety:
//
// There is no use of an `Artichoke` in this module, which means a
// garbage collection of `value` cannot be triggered.
if let Ok(arg) = unsafe { implicitly_convert_to_string(self, value) } {
if let Some(converted) = IntegerString::from_slice(arg) {
Ok(converted)
Expand Down
34 changes: 28 additions & 6 deletions artichoke-backend/src/extn/core/kernel/require.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,20 @@ use crate::ffi;
use crate::state::parser::Context;

pub fn load(interp: &mut Artichoke, mut filename: Value) -> Result<bool, Error> {
// Safety:
//
// Converting the extracted byte slice to an owned `Vec<u8>` is required to
// prevent a use after free. evaling code on the interpreter with `require`
// may cause a garbage collection which might free the `RString` backing
// `filename`.
let filename = unsafe { implicitly_convert_to_string(interp, &mut filename)? };
if filename.find_byte(b'\0').is_some() {
return Err(ArgumentError::with_message("path name contains null byte").into());
}
let file = ffi::bytes_to_os_str(filename)?;
let filename = filename.to_vec();
let file = ffi::bytes_to_os_str(&filename)?;
let path = Path::new(file);

if let Some(mut context) = interp.resolve_source_path(&path)? {
for byte in &mut context {
if *byte == b'\\' {
Expand All @@ -29,16 +37,23 @@ pub fn load(interp: &mut Artichoke, mut filename: Value) -> Result<bool, Error>
return result;
}
let mut message = b"cannot load such file -- ".to_vec();
message.extend_from_slice(filename);
message.extend(filename);
Err(LoadError::from(message).into())
}

pub fn require(interp: &mut Artichoke, mut filename: Value) -> Result<bool, Error> {
// Safety:
//
// Converting the extracted byte slice to an owned `Vec<u8>` is required to
// prevent a use after free. evaling code on the interpreter with `require`
// may cause a garbage collection which might free the `RString` backing
// `filename`.
let filename = unsafe { implicitly_convert_to_string(interp, &mut filename)? };
if filename.find_byte(b'\0').is_some() {
return Err(ArgumentError::with_message("path name contains null byte").into());
}
let file = ffi::bytes_to_os_str(filename)?;
let filename = filename.to_vec();
let file = ffi::bytes_to_os_str(&filename)?;
let path = Path::new(file);

if let Some(mut context) = interp.resolve_source_path(&path)? {
Expand All @@ -55,17 +70,24 @@ pub fn require(interp: &mut Artichoke, mut filename: Value) -> Result<bool, Erro
return result;
}
let mut message = b"cannot load such file -- ".to_vec();
message.extend_from_slice(filename);
message.extend(filename);
Err(LoadError::from(message).into())
}

#[allow(clippy::module_name_repetitions)]
pub fn require_relative(interp: &mut Artichoke, mut filename: Value, base: RelativePath) -> Result<bool, Error> {
// Safety:
//
// Converting the extracted byte slice to an owned `Vec<u8>` is required to
// prevent a use after free. evaling code on the interpreter with `require`
// may cause a garbage collection which might free the `RString` backing
// `filename`.
let filename = unsafe { implicitly_convert_to_string(interp, &mut filename)? };
if filename.find_byte(b'\0').is_some() {
return Err(ArgumentError::with_message("path name contains null byte").into());
}
let file = ffi::bytes_to_os_str(filename)?;
let filename = filename.to_vec();
let file = ffi::bytes_to_os_str(&filename)?;
let path = base.join(Path::new(file));

if let Some(mut context) = interp.resolve_source_path(&path)? {
Expand All @@ -82,7 +104,7 @@ pub fn require_relative(interp: &mut Artichoke, mut filename: Value, base: Relat
return result;
}
let mut message = b"cannot load such file -- ".to_vec();
message.extend_from_slice(filename);
message.extend(filename);
Err(LoadError::from(message).into())
}

Expand Down
6 changes: 5 additions & 1 deletion artichoke-backend/src/extn/core/kernel/trampoline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@ use crate::extn::prelude::*;

pub fn integer(interp: &mut Artichoke, mut arg: Value, base: Option<Value>) -> Result<Value, Error> {
let base = base.and_then(|base| interp.convert(base));
let arg = interp.try_convert_mut(&mut arg)?;
// Safety:
//
// Extract the `Copy` radix integer first since implicit conversions can
// trigger garbage collections.
let base = interp.try_convert_mut(base)?;
let arg = interp.try_convert_mut(&mut arg)?;
let integer = kernel::integer::method(arg, base)?;
Ok(interp.convert(integer))
}
Expand Down
19 changes: 18 additions & 1 deletion artichoke-backend/src/extn/core/regexp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,11 @@ impl Regexp {
}
regexp.inner().source().clone()
} else {
// Safety:
//
// `bytes` is converted to an owned byte vec before any additional
// operations are run on the interpreter which might trigger a
// garbage collection of `pattern` and its backing `RString`.
let bytes = unsafe { implicitly_convert_to_string(interp, &mut pattern)? };
Source::with_pattern_and_options(bytes.to_vec(), options.unwrap_or_default())
};
Expand All @@ -130,6 +135,12 @@ impl Regexp {
let source = regexp.inner().config();
Ok(source.pattern().to_vec())
} else {
// Safety:
//
// `bytes` is converted to an owned `String` before any
// additional operations are run on the interpreter which might
// trigger a garbage collection of `pattern` and its backing
// `RString`.
let bytes = unsafe { implicitly_convert_to_string(interp, value)? };
if let Ok(pattern) = str::from_utf8(bytes) {
Ok(syntax::escape(pattern).into_bytes())
Expand Down Expand Up @@ -183,7 +194,13 @@ impl Regexp {
pattern_vec = symbol.bytes(interp).to_vec();
pattern_vec.as_slice()
} else if let Ok(pattern) = unsafe { implicitly_convert_to_string(interp, &mut other) } {
pattern
// Safety:
//
// `pattern` is converted to an owned byte vec before any
// intervening operations on the VM which may trigger a garbage
// collection of the `RString` that backs `other`.
pattern_vec = pattern.to_vec();
pattern_vec.as_slice()
} else {
interp.unset_global_variable(LAST_MATCH)?;
return Ok(false);
Expand Down
7 changes: 6 additions & 1 deletion artichoke-backend/src/extn/core/regexp/trampoline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,12 @@ pub fn escape(interp: &mut Artichoke, mut pattern: Value) -> Result<Value, Error
pattern_vec = symbol.bytes(interp).to_vec();
pattern_vec.as_slice()
} else {
unsafe { implicitly_convert_to_string(interp, &mut pattern)? }
// Safety:
//
// Convert the bytes to an owned vec to prevent the underlying `RString`
// backing `pattern` from being freed during a garbage collection.
pattern_vec = unsafe { implicitly_convert_to_string(interp, &mut pattern)?.to_vec() };
pattern_vec.as_slice()
};
let pattern = Regexp::escape(pattern)?;
Ok(interp.convert_mut(pattern))
Expand Down
40 changes: 26 additions & 14 deletions artichoke-backend/src/extn/core/string/trampoline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,32 +37,38 @@ pub fn scan(interp: &mut Artichoke, value: Value, mut pattern: Value, block: Opt
return Ok(interp.try_convert_mut(scan)?.unwrap_or(value));
}
#[cfg(feature = "core-regexp")]
// Safety:
//
// Convert `pattern_bytes` to an owned byte vec to ensure the underlying
// `RString` is not garbage collected when yielding matches.
if let Ok(pattern_bytes) = unsafe { implicitly_convert_to_string(interp, &mut pattern) } {
let pattern_bytes = pattern_bytes.to_vec();

let string = value.try_into_mut::<&[u8]>(interp)?;
if let Some(ref block) = block {
let regex = Regexp::lazy(pattern_bytes.to_vec());
let regex = Regexp::lazy(pattern_bytes.clone());
let matchdata = MatchData::new(string.to_vec(), regex, ..);
let patlen = pattern_bytes.len();
if let Some(pos) = string.find(pattern_bytes) {
if let Some(pos) = string.find(&pattern_bytes) {
let mut data = matchdata.clone();
data.set_region(pos..pos + patlen);
let data = MatchData::alloc_value(data, interp)?;
interp.set_global_variable(regexp::LAST_MATCH, &data)?;

let block_arg = interp.convert_mut(pattern_bytes);
let block_arg = interp.convert_mut(pattern_bytes.as_slice());
let _ = block.yield_arg(interp, &block_arg)?;

interp.set_global_variable(regexp::LAST_MATCH, &data)?;

let offset = pos + patlen;
let string = string.get(offset..).unwrap_or_default();
for pos in string.find_iter(pattern_bytes) {
for pos in string.find_iter(&pattern_bytes) {
let mut data = matchdata.clone();
data.set_region(offset + pos..offset + pos + patlen);
let data = MatchData::alloc_value(data, interp)?;
interp.set_global_variable(regexp::LAST_MATCH, &data)?;

let block_arg = interp.convert_mut(pattern_bytes);
let block_arg = interp.convert_mut(pattern_bytes.as_slice());
let _ = block.yield_arg(interp, &block_arg)?;

interp.set_global_variable(regexp::LAST_MATCH, &data)?;
Expand All @@ -73,17 +79,17 @@ pub fn scan(interp: &mut Artichoke, value: Value, mut pattern: Value, block: Opt
return Ok(value);
}
let (matches, last_pos) = string
.find_iter(pattern_bytes)
.find_iter(&pattern_bytes)
.enumerate()
.last()
.map(|(m, p)| (m + 1, p))
.unwrap_or_default();
let mut result = Vec::with_capacity(matches);
for _ in 0..matches {
result.push(interp.convert_mut(pattern_bytes));
result.push(interp.convert_mut(pattern_bytes.as_slice()));
}
if matches > 0 {
let regex = Regexp::lazy(pattern_bytes.to_vec());
let regex = Regexp::lazy(pattern_bytes.clone());
let matchdata = MatchData::new(string.to_vec(), regex, last_pos..last_pos + pattern_bytes.len());
let data = MatchData::alloc_value(matchdata, interp)?;
interp.set_global_variable(regexp::LAST_MATCH, &data)?;
Expand All @@ -93,32 +99,38 @@ pub fn scan(interp: &mut Artichoke, value: Value, mut pattern: Value, block: Opt
return interp.try_convert_mut(result);
}
#[cfg(not(feature = "core-regexp"))]
// Safety:
//
// Convert `pattern_bytes` to an owned byte vec to ensure the underlying
// `RString` is not garbage collected when yielding matches.
if let Ok(pattern_bytes) = unsafe { implicitly_convert_to_string(interp, &mut pattern) } {
let pattern_bytes = pattern_bytes.to_vec();

let string = value.try_into_mut::<&[u8]>(interp)?;
if let Some(ref block) = block {
let patlen = pattern_bytes.len();
if let Some(pos) = string.find(pattern_bytes) {
let block_arg = interp.convert_mut(pattern_bytes);
if let Some(pos) = string.find(&pattern_bytes) {
let block_arg = interp.convert_mut(pattern_bytes.as_slice());
let _ = block.yield_arg(interp, &block_arg)?;

let offset = pos + patlen;
let string = string.get(offset..).unwrap_or_default();
for _ in string.find_iter(pattern_bytes) {
let block_arg = interp.convert_mut(pattern_bytes);
for _ in string.find_iter(&pattern_bytes) {
let block_arg = interp.convert_mut(pattern_bytes.as_slice());
let _ = block.yield_arg(interp, &block_arg)?;
}
}
return Ok(value);
}
let matches = string
.find_iter(pattern_bytes)
.find_iter(&pattern_bytes)
.enumerate()
.last()
.map(|(m, _)| m + 1)
.unwrap_or_default();
let mut result = Vec::with_capacity(matches);
for _ in 0..matches {
result.push(interp.convert_mut(pattern_bytes));
result.push(interp.convert_mut(pattern_bytes.as_slice()));
}
return interp.try_convert_mut(result);
}
Expand Down

0 comments on commit 692a45d

Please sign in to comment.