Skip to content

Commit

Permalink
analyze: add NON_NULL rewrites (#1095)
Browse files Browse the repository at this point in the history
This branch implements rewriting of nullable pointers (those lacking
`PermissionSet::NON_NULL`) to `Option`. This includes the following
kinds of rewrites:

* Type annotations: `Option<&mut T>` instead of `&mut T`
* Casts between nullable and non-nullable via `p.unwrap()` and
`Some(p)`. For most permissions, we implement only one direction because
the other is invalid/nonsensical, but here we implement both in order to
support "unsound" rewrites involving overridden `NON_NULL` flags (as in
#1088).
* Borrows: when casting `p: Option<&mut T>`, we use
`p.as_deref_mut().unwrap()` instead of `p.unwrap()` to avoid consuming
the original `p`. (In the code, this is called a "downgrade", since it
allows borrowing `Option<Box<T>>` as `Option<&T>` and similar.)
* Pointer projections on nullable pointers. Where `NON_NULL` pointers
would use `&p[0]`, nullable ones use `p.map(|ptr| &ptr[0])`. Internally,
this is represented similar to `Some(&p.unwrap()[0])`, but it's handled
specially by `rewrite::expr::convert` to produce a `map` call instead,
which passes through `None` without a panic.
* `unwrap()` calls on derefs. `*p` is rewritten to `*p.unwrap()`, or to
`*p.as_deref().unwrap()` if a downgrade/borrow is necessary to avoid
moving `p`.
* `ptr::null()` and `0 as *const _` to `None`, and `p.is_null()` to
`p.is_none()`.

The new `non_null_rewrites.rs` test case passes, and the rewritten code
compiles.
  • Loading branch information
spernsteiner committed Jun 25, 2024
2 parents 9ad9d35 + 73652be commit e526781
Show file tree
Hide file tree
Showing 12 changed files with 1,067 additions and 285 deletions.
4 changes: 2 additions & 2 deletions c2rust-analyze/src/dataflow/type_check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::pointee_type::PointeeTypes;
use crate::pointer_id::PointerTable;
use crate::recent_writes::RecentWrites;
use crate::util::{
describe_rvalue, is_null_const, is_transmutable_ptr_cast, ty_callee, Callee, RvalueDesc,
self, describe_rvalue, is_transmutable_ptr_cast, ty_callee, Callee, RvalueDesc,
UnknownDefCallee,
};
use assert_matches::assert_matches;
Expand Down Expand Up @@ -120,7 +120,7 @@ impl<'tcx> TypeChecker<'tcx, '_> {
CastKind::PointerFromExposedAddress => {
// We support only one case here, which is the case of null pointers
// constructed via casts such as `0 as *const T`
if !op.constant().copied().map(is_null_const).unwrap_or(false) {
if !util::is_null_const_operand(op) {
panic!("Creating non-null pointers from exposed addresses not supported");
}
// The target type of the cast must not have `NON_NULL` permission.
Expand Down
7 changes: 7 additions & 0 deletions c2rust-analyze/src/rewrite/apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,13 @@ impl<S: Sink> Emitter<'_, S> {
self.emit(rw, 0)
}

Rewrite::Closure1(ref name, ref rw) => {
self.emit_str("|")?;
self.emit_str(name)?;
self.emit_str("| ")?;
self.emit(rw, 0)
}

Rewrite::TyPtr(ref rw, mutbl) => {
match mutbl {
Mutability::Not => self.emit_str("*const ")?,
Expand Down
476 changes: 328 additions & 148 deletions c2rust-analyze/src/rewrite/expr/convert.rs

Large diffs are not rendered by default.

277 changes: 254 additions & 23 deletions c2rust-analyze/src/rewrite/expr/mir_op.rs

Large diffs are not rendered by default.

262 changes: 225 additions & 37 deletions c2rust-analyze/src/rewrite/expr/unlower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,15 +114,10 @@ impl<'a, 'tcx> UnlowerVisitor<'a, 'tcx> {
}

fn operand_desc(&self, op: &Operand<'tcx>) -> MirOriginDesc {
match *op {
mir::Operand::Copy(pl) | mir::Operand::Move(pl) => {
if is_temp_var(self.mir, pl.as_ref()) {
MirOriginDesc::LoadFromTemp
} else {
MirOriginDesc::Expr
}
}
mir::Operand::Constant(..) => MirOriginDesc::Expr,
if is_temp_var_operand(self.mir, op) {
MirOriginDesc::LoadFromTemp
} else {
MirOriginDesc::Expr
}
}

Expand Down Expand Up @@ -215,20 +210,16 @@ impl<'a, 'tcx> UnlowerVisitor<'a, 'tcx> {
match ex.kind {
hir::ExprKind::Assign(pl, rv, _span) => {
// For `Assign`, we expect the assignment to be the whole thing.
let (loc, _mir_pl, mir_rv) = match self.get_sole_assign(&locs) {
let (loc, mir_pl, mir_rv) = match self.get_sole_assign(&locs) {
Some(x) => x,
None => {
warn("expected exactly one StatementKind::Assign");
return;
}
};
let desc = match mir_rv {
mir::Rvalue::Use(op) => self.operand_desc(op),
_ => MirOriginDesc::Expr,
};
self.record(loc, &[], ex);
self.record(loc, &[SubLoc::Dest], pl);
self.record_desc(loc, &[SubLoc::Rvalue], rv, desc);
self.visit_expr_place(pl, loc, vec![SubLoc::Dest], mir_pl, &[]);
self.visit_expr_rvalue(rv, loc, vec![SubLoc::Rvalue], mir_rv, &[]);
}

hir::ExprKind::Call(_, args) | hir::ExprKind::MethodCall(_, args, _) => {
Expand Down Expand Up @@ -275,8 +266,9 @@ impl<'a, 'tcx> UnlowerVisitor<'a, 'tcx> {

self.record(loc, &[SubLoc::Rvalue], ex);
for (i, (arg, mir_arg)) in args.iter().zip(mir_args).enumerate() {
self.record_operand(loc, &[SubLoc::Rvalue, SubLoc::CallArg(i)], arg, mir_arg);
self.visit_expr_operand(arg, mir_arg, &[]);
let sub_loc = vec![SubLoc::Rvalue, SubLoc::CallArg(i)];
self.record_operand(loc, &sub_loc, arg, mir_arg);
self.visit_expr_operand(arg, loc, sub_loc, mir_arg, &[]);
}

if extra_locs.len() > 0 {
Expand Down Expand Up @@ -306,8 +298,7 @@ impl<'a, 'tcx> UnlowerVisitor<'a, 'tcx> {
}
};
self.record_desc(cursor.loc, &[], ex, MirOriginDesc::StoreIntoLocal);
self.peel_adjustments(ex, &mut cursor);
self.record_desc(cursor.loc, &cursor.sub_loc, ex, MirOriginDesc::Expr);
self.walk_expr(ex, &mut cursor);
self.finish_visit_expr_cursor(ex, cursor);
}
}
Expand Down Expand Up @@ -436,14 +427,124 @@ impl<'a, 'tcx> UnlowerVisitor<'a, 'tcx> {
}
}

fn visit_expr_rvalue(
&mut self,
ex: &'tcx hir::Expr<'tcx>,
loc: Location,
sub_loc: Vec<SubLoc>,
rv: &'a mir::Rvalue<'tcx>,
extra_locs: &[Location],
) {
let _g = panic_detail::set_current_span(ex.span);

// TODO: We do this check early to ensure that the `LoadFromTemp` is emitted with subloc
// `[Rvalue]` rather than `[Rvalue, RvalueOperand(0)]`, since `mir_op` emits rewrites with
// just `[Rvalue]`. For `Rvalue::Use`, the rvalue and its operand are basically
// synonymous, so ideally we would accept both sublocs as well. We should probably add an
// aliasing system or some kind of cleverness in `distribute` to make both versions work,
// or else modify the definition of `SubLoc` so there's only one way to express this
// location.
if is_temp_var_rvalue(self.mir, rv) {
self.record_desc(loc, &sub_loc, ex, MirOriginDesc::LoadFromTemp);
return;
}

let mut cursor = VisitExprCursor::new(
self.mir,
ExprMir::Rvalue(rv),
extra_locs,
loc,
sub_loc.to_owned(),
);
self.walk_expr(ex, &mut cursor);

self.finish_visit_expr_cursor(ex, cursor);
}

fn visit_expr_operand(
&mut self,
ex: &'tcx hir::Expr<'tcx>,
_rv: &'a mir::Operand<'tcx>,
_extra_locs: &[Location],
loc: Location,
sub_loc: Vec<SubLoc>,
op: &'a mir::Operand<'tcx>,
extra_locs: &[Location],
) {
let _g = panic_detail::set_current_span(ex.span);

if is_temp_var_operand(self.mir, op) {
self.record_desc(loc, &sub_loc, ex, MirOriginDesc::LoadFromTemp);
return;
}

let mut cursor = VisitExprCursor::new(
self.mir,
ExprMir::Operand(op),
extra_locs,
loc,
sub_loc.to_owned(),
);
self.walk_expr(ex, &mut cursor);

self.finish_visit_expr_cursor(ex, cursor);
}

fn visit_expr_place(
&mut self,
ex: &'tcx hir::Expr<'tcx>,
loc: Location,
sub_loc: Vec<SubLoc>,
pl: mir::Place<'tcx>,
extra_locs: &[Location],
) {
let _g = panic_detail::set_current_span(ex.span);
// TODO: handle adjustments, overloaded operators, etc

let mut cursor = VisitExprCursor::new(
self.mir,
ExprMir::Place(pl.as_ref()),
extra_locs,
loc,
sub_loc.to_owned(),
);
self.walk_expr(ex, &mut cursor);

self.finish_visit_expr_cursor(ex, cursor);
}

fn walk_expr(&mut self, ex: &'tcx hir::Expr<'tcx>, cursor: &mut VisitExprCursor<'a, '_, 'tcx>) {
let mut ex = ex;
loop {
self.peel_adjustments(ex, cursor);
if cursor.is_temp_var() {
self.record_desc(cursor.loc, &cursor.sub_loc, ex, MirOriginDesc::LoadFromTemp);
break;
} else {
self.record_desc(cursor.loc, &cursor.sub_loc, ex, MirOriginDesc::Expr);
}

match ex.kind {
hir::ExprKind::Unary(hir::UnOp::Deref, ptr_ex) => {
if let Some(()) = cursor.peel_deref() {
ex = ptr_ex;
continue;
}
}
hir::ExprKind::Field(base_ex, _field_ident) => {
if let Some(()) = cursor.peel_field() {
ex = base_ex;
continue;
}
}
hir::ExprKind::Index(arr_ex, _idx_ex) => {
if let Some(()) = cursor.peel_index() {
ex = arr_ex;
continue;
}
}
_ => {}
}
// Keep looping only in cases that we specifically recognize.
break;
}
}
}

Expand Down Expand Up @@ -526,6 +627,16 @@ impl<'a, 'b, 'tcx> VisitExprCursor<'a, 'b, 'tcx> {
}
}

/// Check whether the current MIR is a temporary.
pub fn is_temp_var(&self) -> bool {
match self.cur {
ExprMir::Rvalue(rv) => is_temp_var_rvalue(self.mir, rv),
ExprMir::Operand(op) => is_temp_var_operand(self.mir, op),
ExprMir::Place(pl) => is_temp_var(self.mir, pl),
ExprMir::Call(_) => false,
}
}

/// If the current MIR is a temporary, and the previous `Location` is an assignment to
/// that temporary, peel it off, leaving the temporary's initializer as the current
/// `Rvalue`. This also adds `LoadFromTemp` and `StoreIntoLocal` entries in `self.temp_info`
Expand Down Expand Up @@ -673,12 +784,37 @@ impl<'a, 'b, 'tcx> VisitExprCursor<'a, 'b, 'tcx> {
if let Some((&outer_proj, remaining_proj)) = pl.projection.split_last() {
if matches!(outer_proj, mir::PlaceElem::Deref) {
pl.projection = remaining_proj;
// Number of derefs, not counting the outermost one we just peeled off.
let num_inner_derefs = remaining_proj
.iter()
.filter(|p| matches!(p, mir::PlaceElem::Deref))
.count();
self.sub_loc.push(SubLoc::PlacePointer(num_inner_derefs));
self.sub_loc.push(SubLoc::PlaceDerefPointer);
return Some(());
}
}
self.peel_temp()?;
}
}

/// If the current MIR is a `Place` ending with a `Field` projection, peel off the `Field`.
pub fn peel_field(&mut self) -> Option<()> {
loop {
let pl = self.require_place_mut()?;
if let Some((&outer_proj, remaining_proj)) = pl.projection.split_last() {
if matches!(outer_proj, mir::PlaceElem::Field(_idx, _ty)) {
pl.projection = remaining_proj;
self.sub_loc.push(SubLoc::PlaceFieldBase);
return Some(());
}
}
self.peel_temp()?;
}
}

/// If the current MIR is a `Place` ending with an `Index` projection, peel off the `Index`.
pub fn peel_index(&mut self) -> Option<()> {
loop {
let pl = self.require_place_mut()?;
if let Some((&outer_proj, remaining_proj)) = pl.projection.split_last() {
if matches!(outer_proj, mir::PlaceElem::Index(_)) {
pl.projection = remaining_proj;
self.sub_loc.push(SubLoc::PlaceIndexArray);
return Some(());
}
}
Expand Down Expand Up @@ -712,25 +848,77 @@ fn is_temp_var(mir: &Body, pl: mir::PlaceRef) -> bool {
pl.projection.len() == 0 && mir.local_kind(pl.local) == mir::LocalKind::Temp
}

fn is_temp_var_operand(mir: &Body, op: &mir::Operand) -> bool {
match get_operand_place(op) {
Some(pl) => is_temp_var(mir, pl.as_ref()),
None => false,
}
}

fn is_temp_var_rvalue(mir: &Body, rv: &mir::Rvalue) -> bool {
match get_rvalue_operand(rv) {
Some(op) => is_temp_var_operand(mir, op),
None => false,
}
}

fn get_rvalue_operand<'a, 'tcx>(rv: &'a mir::Rvalue<'tcx>) -> Option<&'a mir::Operand<'tcx>> {
match *rv {
mir::Rvalue::Use(ref op) => Some(op),
_ => None,
}
}

fn get_operand_place<'tcx>(op: &mir::Operand<'tcx>) -> Option<mir::Place<'tcx>> {
match *op {
mir::Operand::Copy(pl) | mir::Operand::Move(pl) => Some(pl),
_ => None,
}
}

/// Indicate whether a given MIR statement should be considered when building the unlowering map.
fn filter_stmt(stmt: &mir::Statement) -> bool {
match stmt.kind {
// Ignore `AscribeUserType` annotations. These appear in the middle of some expressions.
// It's easier to ignore them all at this level rather than try to handle them in all the
// places they might appear.
mir::StatementKind::AscribeUserType(..) => false,
_ => true,
}
}

/// Indicate whether a given MIR terminator should be considered when building the unlowering map.
fn filter_term(term: &mir::Terminator) -> bool {
match term.kind {
_ => true,
}
}

fn build_span_index(mir: &Body<'_>) -> SpanIndex<Location> {
eprintln!("building span index for {:?}:", mir.source);
debug!("building span index for {:?}:", mir.source);
let mut span_index_items = Vec::new();
for (bb, bb_data) in mir.basic_blocks().iter_enumerated() {
for (i, stmt) in bb_data.statements.iter().enumerate() {
if !filter_stmt(stmt) {
continue;
}
let loc = Location {
block: bb,
statement_index: i,
};
eprintln!(" {:?}: {:?}", loc, stmt.source_info.span);
debug!(" {:?}: {:?}", loc, stmt.source_info.span);
span_index_items.push((stmt.source_info.span, loc));
}

let loc = Location {
block: bb,
statement_index: bb_data.statements.len(),
};
eprintln!(" {:?}: {:?}", loc, bb_data.terminator().source_info.span);
span_index_items.push((bb_data.terminator().source_info.span, loc));
let term = bb_data.terminator();
if filter_term(term) {
let loc = Location {
block: bb,
statement_index: bb_data.statements.len(),
};
debug!(" {:?}: {:?}", loc, term.source_info.span);
span_index_items.push((term.source_info.span, loc));
}
}

SpanIndex::new(span_index_items)
Expand Down
4 changes: 4 additions & 0 deletions c2rust-analyze/src/rewrite/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,9 @@ pub enum Rewrite<S = Span> {
/// Single-variable `let` binding. This has the same scoping issues as multi-variable `Let`;
/// because of this, `Let` should generally be used instead of multiple `Let1`s.
Let1(String, Box<Rewrite>),
/// Single-argument closure. As with `Let` and `Let1`, the body must be carefully constructed
/// to avoid potential shadowing.
Closure1(String, Box<Rewrite>),

// Type builders
/// Emit a complete pretty-printed type, discarding the original annotation.
Expand Down Expand Up @@ -196,6 +199,7 @@ impl Rewrite {
Let(new_vars)
}
Let1(ref name, ref rw) => Let1(String::clone(name), try_subst(rw)?),
Closure1(ref name, ref rw) => Closure1(String::clone(name), try_subst(rw)?),

Print(ref s) => Print(String::clone(s)),
TyPtr(ref rw, mutbl) => TyPtr(try_subst(rw)?, mutbl),
Expand Down
Loading

0 comments on commit e526781

Please sign in to comment.