From 084e520311c584a54f694e2682c1025c5915484d Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Fri, 3 May 2024 11:33:32 -0700 Subject: [PATCH 01/30] analyze: add TypeDesc::option flag --- c2rust-analyze/src/rewrite/expr/mir_op.rs | 6 ++ c2rust-analyze/src/type_desc.rs | 84 +++++++++++++++-------- 2 files changed, 63 insertions(+), 27 deletions(-) diff --git a/c2rust-analyze/src/rewrite/expr/mir_op.rs b/c2rust-analyze/src/rewrite/expr/mir_op.rs index e5b391752..11c0a8897 100644 --- a/c2rust-analyze/src/rewrite/expr/mir_op.rs +++ b/c2rust-analyze/src/rewrite/expr/mir_op.rs @@ -627,6 +627,7 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { Quantity::OffsetPtr => Quantity::OffsetPtr, Quantity::Array => unreachable!("perms_to_desc should not return Quantity::Array"), }, + option: result_desc.option, pointee_ty: result_desc.pointee_ty, }; @@ -635,6 +636,8 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { // Emit `OffsetSlice` for the offset itself. let mutbl = matches!(result_desc.own, Ownership::Mut); + // TODO: need to support an `Option`-compatible version of `OffsetSlice` + // e.g. `{ let offset = <...>; p.map(|p| &p[offset..]) } v.emit(RewriteKind::OffsetSlice { mutbl }); // The `OffsetSlice` operation returns something of the same type as its input. @@ -797,6 +800,9 @@ where // Overwriting `from.pointee_ty` allows the final `from == to` check to succeed below. from.pointee_ty = to.pointee_ty; + // FIXME: checking `option` flags is disabled for now (not yet implemented) + from.option = to.option; + if from == to { return Ok(()); } diff --git a/c2rust-analyze/src/type_desc.rs b/c2rust-analyze/src/type_desc.rs index 44526f305..8c4a201a0 100644 --- a/c2rust-analyze/src/type_desc.rs +++ b/c2rust-analyze/src/type_desc.rs @@ -39,10 +39,42 @@ pub enum Quantity { pub struct TypeDesc<'tcx> { pub own: Ownership, pub qty: Quantity, + pub option: bool, pub pointee_ty: Ty<'tcx>, } -fn perms_to_own_and_qty(perms: PermissionSet, flags: FlagSet) -> (Ownership, Quantity) { +#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] +pub struct PtrDesc { + pub own: Ownership, + pub qty: Quantity, + pub option: bool, +} + +impl<'tcx> From> for PtrDesc { + fn from(x: TypeDesc<'tcx>) -> PtrDesc { + let TypeDesc { + own, + qty, + option, + pointee_ty: _, + } = x; + PtrDesc { own, qty, option } + } +} + +impl PtrDesc { + pub fn to_type_desc<'tcx>(self, pointee_ty: Ty<'tcx>) -> TypeDesc<'tcx> { + let PtrDesc { own, qty, option } = self; + TypeDesc { + own, + qty, + option, + pointee_ty, + } + } +} + +fn perms_to_ptr_desc(perms: PermissionSet, flags: FlagSet) -> PtrDesc { let own = if perms.contains(PermissionSet::UNIQUE | PermissionSet::WRITE) { Ownership::Mut } else if flags.contains(FlagSet::CELL) { @@ -61,7 +93,9 @@ fn perms_to_own_and_qty(perms: PermissionSet, flags: FlagSet) -> (Ownership, Qua Quantity::Single }; - (own, qty) + let option = !perms.contains(PermissionSet::NON_NULL); + + PtrDesc { own, qty, option } } /// Obtain the `TypeDesc` for a pointer. `ptr_ty` should be the `Ty` of the pointer, and `perms` @@ -73,7 +107,7 @@ pub fn perms_to_desc(ptr_ty: Ty, perms: PermissionSet, flags: FlagSet) -> TypeDe "building TypeDesc for FIXED pointer requires a related pointee type" ); - let (own, qty) = perms_to_own_and_qty(perms, flags); + let ptr_desc = perms_to_ptr_desc(perms, flags); let pointee_ty = match *ptr_ty.kind() { TyKind::Ref(_, ty, _) => ty, @@ -83,23 +117,15 @@ pub fn perms_to_desc(ptr_ty: Ty, perms: PermissionSet, flags: FlagSet) -> TypeDe _ => panic!("expected a pointer type, but got {:?}", ptr_ty), }; - TypeDesc { - own, - qty, - pointee_ty, - } + ptr_desc.to_type_desc(pointee_ty) } /// Obtain the `TypeDesc` for a pointer to a local. `local_ty` should be the `Ty` of the local /// itself, and `perms` and `flags` should be taken from its `addr_of_local` `PointerId`. pub fn local_perms_to_desc(local_ty: Ty, perms: PermissionSet, flags: FlagSet) -> TypeDesc { - let (own, qty) = perms_to_own_and_qty(perms, flags); + let ptr_desc = perms_to_ptr_desc(perms, flags); let pointee_ty = local_ty; - TypeDesc { - own, - qty, - pointee_ty, - } + ptr_desc.to_type_desc(pointee_ty) } pub fn perms_to_desc_with_pointee<'tcx>( @@ -109,26 +135,18 @@ pub fn perms_to_desc_with_pointee<'tcx>( perms: PermissionSet, flags: FlagSet, ) -> TypeDesc<'tcx> { - let (own, qty) = if flags.contains(FlagSet::FIXED) { + let ptr_desc = if flags.contains(FlagSet::FIXED) { unpack_pointer_type(tcx, ptr_ty, pointee_ty) } else { - perms_to_own_and_qty(perms, flags) + perms_to_ptr_desc(perms, flags) }; - TypeDesc { - own, - qty, - pointee_ty, - } + ptr_desc.to_type_desc(pointee_ty) } /// Unpack an existing `Ty` into its ownership and quantity. The pointee type must already be /// known. Panics if there are no `Ownership` and `Quantity` that combine with `pointee_ty` to /// produce `ty`. -pub fn unpack_pointer_type<'tcx>( - tcx: TyCtxt<'tcx>, - ty: Ty<'tcx>, - pointee_ty: Ty<'tcx>, -) -> (Ownership, Quantity) { +pub fn unpack_pointer_type<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>, pointee_ty: Ty<'tcx>) -> PtrDesc { #[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] enum Step { Ref(Mutability), @@ -139,6 +157,7 @@ pub fn unpack_pointer_type<'tcx>( Slice, OffsetPtr, Array, + Option, } let mut steps = Vec::new(); @@ -152,6 +171,9 @@ pub fn unpack_pointer_type<'tcx>( TyKind::Adt(adt_def, substs) if is_cell(tcx, adt_def) => { (Step::Cell, substs.type_at(0)) } + TyKind::Adt(adt_def, substs) if is_option(tcx, adt_def) => { + (Step::Option, substs.type_at(0)) + } TyKind::Adt(adt_def, substs) if is_offset_ptr(tcx, adt_def) => { (Step::OffsetPtr, substs.type_at(0)) } @@ -181,6 +203,8 @@ pub fn unpack_pointer_type<'tcx>( }; // This logic is roughly the inverse of that in `rewrite::ty::mk_rewritten_ty`. + let option = eat(Step::Option); + let mut own = if eat(Step::Ref(Mutability::Not)) { Ownership::Imm } else if eat(Step::Ref(Mutability::Mut)) { @@ -226,7 +250,7 @@ pub fn unpack_pointer_type<'tcx>( &steps[i..], ); - (own, qty) + PtrDesc { own, qty, option } } /// Returns `true` if `adt_def` is the type `std::cell::Cell`. @@ -235,6 +259,12 @@ fn is_cell<'tcx>(_tcx: TyCtxt<'tcx>, _adt_def: AdtDef<'tcx>) -> bool { false } +/// Returns `true` if `adt_def` is the type `std::option::Option`. +fn is_option<'tcx>(_tcx: TyCtxt<'tcx>, _adt_def: AdtDef<'tcx>) -> bool { + // TODO + false +} + /// Returns `true` if `adt_def` is the type `std::rc::Rc`. fn is_rc<'tcx>(_tcx: TyCtxt<'tcx>, _adt_def: AdtDef<'tcx>) -> bool { // TODO From c0ddd30d310121342b1fdd60f19d9659cadf2396 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Fri, 3 May 2024 13:19:06 -0700 Subject: [PATCH 02/30] analyze: implement Option `p.unwrap()` and `Some(p)` casts --- c2rust-analyze/src/rewrite/expr/convert.rs | 9 ++++++++ c2rust-analyze/src/rewrite/expr/mir_op.rs | 27 +++++++++++++++++++--- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/c2rust-analyze/src/rewrite/expr/convert.rs b/c2rust-analyze/src/rewrite/expr/convert.rs index 164af12cc..97568211c 100644 --- a/c2rust-analyze/src/rewrite/expr/convert.rs +++ b/c2rust-analyze/src/rewrite/expr/convert.rs @@ -470,6 +470,15 @@ pub fn convert_cast_rewrite(kind: &mir_op::RewriteKind, hir_rw: Rewrite) -> Rewr Rewrite::Ref(Box::new(place), hir::Mutability::Not) } + mir_op::RewriteKind::OptionUnwrap => { + // `p` -> `p.unwrap()` + Rewrite::MethodCall("unwrap".to_string(), Box::new(hir_rw), vec![]) + } + mir_op::RewriteKind::OptionSome => { + // `p` -> `Some(p)` + Rewrite::Call("std::option::Option::Some".to_string(), vec![hir_rw]) + } + mir_op::RewriteKind::CastRefToRaw { mutbl } => { // `addr_of!(*p)` is cleaner than `p as *const _`; we don't know the pointee // type here, so we can't emit `p as *const T`. diff --git a/c2rust-analyze/src/rewrite/expr/mir_op.rs b/c2rust-analyze/src/rewrite/expr/mir_op.rs index 11c0a8897..51a071307 100644 --- a/c2rust-analyze/src/rewrite/expr/mir_op.rs +++ b/c2rust-analyze/src/rewrite/expr/mir_op.rs @@ -78,6 +78,11 @@ pub enum RewriteKind { dest_single: bool, }, + /// Convert `Option` to `T` by calling `.unwrap()`. + OptionUnwrap, + /// Convert `T` to `Option` by wrapping the value in `Some`. + OptionSome, + /// Cast `&T` to `*const T` or `&mut T` to `*mut T`. CastRefToRaw { mutbl: bool }, /// Cast `*const T` to `*mut T` or vice versa. If `to_mutbl` is true, we are casting to @@ -800,13 +805,23 @@ where // Overwriting `from.pointee_ty` allows the final `from == to` check to succeed below. from.pointee_ty = to.pointee_ty; - // FIXME: checking `option` flags is disabled for now (not yet implemented) - from.option = to.option; - if from == to { return Ok(()); } + if from.option && !to.option { + // Unwrap first, then perform remaining casts. + // TODO: Need to use `.as_ref()`/`.as_mut()` in some cases to avoid moving `from`. + // Which one to use depends on the ownership of `from` and `to`. + (self.emit)(RewriteKind::OptionUnwrap); + from.option = false; + } + + if from.option && to.option { + // FIXME: mapping casts over an `Option` is not yet implemented + from.option = to.option; + } + // Early `Ownership` casts. We do certain casts here in hopes of reaching an `Ownership` // on which we can safely adjust `Quantity`. from.own = self.cast_ownership(from, to, true)?; @@ -861,6 +876,12 @@ where // Late `Ownership` casts. from.own = self.cast_ownership(from, to, false)?; + if !from.option && to.option { + // Wrap at the end, after performing all other steps of the cast. + (self.emit)(RewriteKind::OptionSome); + from.option = true; + } + if from != to { return Err(format!( "unsupported cast kind: {:?} -> {:?} (original input: {:?})", From 2789a2575cd001aab2a5f46175756359b86439ff Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Fri, 3 May 2024 14:26:03 -0700 Subject: [PATCH 03/30] analyze: handle ptr::offset calls for nullable pointers --- c2rust-analyze/src/rewrite/apply.rs | 7 ++++++ c2rust-analyze/src/rewrite/expr/convert.rs | 25 +++++++++++++++++++ c2rust-analyze/src/rewrite/expr/mir_op.rs | 10 +++++--- c2rust-analyze/src/rewrite/mod.rs | 4 +++ c2rust-analyze/tests/filecheck.rs | 1 + .../tests/filecheck/non_null_map.rs | 15 +++++++++++ 6 files changed, 59 insertions(+), 3 deletions(-) create mode 100644 c2rust-analyze/tests/filecheck/non_null_map.rs diff --git a/c2rust-analyze/src/rewrite/apply.rs b/c2rust-analyze/src/rewrite/apply.rs index bd9d9d4ad..2f9370a28 100644 --- a/c2rust-analyze/src/rewrite/apply.rs +++ b/c2rust-analyze/src/rewrite/apply.rs @@ -411,6 +411,13 @@ impl Emitter<'_, S> { self.emit(rw, 0) } + Rewrite::Closure1(ref name, ref rw) => { + self.emit_str("|")?; + self.emit_str(name)?; + self.emit_str("| ")?; + self.emit(rw, 0) + } + Rewrite::TyPtr(ref rw, mutbl) => { match mutbl { Mutability::Not => self.emit_str("*const ")?, diff --git a/c2rust-analyze/src/rewrite/expr/convert.rs b/c2rust-analyze/src/rewrite/expr/convert.rs index 97568211c..4ea3b66f9 100644 --- a/c2rust-analyze/src/rewrite/expr/convert.rs +++ b/c2rust-analyze/src/rewrite/expr/convert.rs @@ -163,6 +163,31 @@ impl<'tcx> Visitor<'tcx> for ConvertVisitor<'tcx> { Rewrite::Ref(Box::new(elem), mutbl_from_bool(mutbl)) } + mir_op::RewriteKind::OptionMapOffsetSlice { mutbl } => { + // `p.offset(i)` -> `p.as_ref().map(|p| &p[i as usize ..])` + assert!(matches!(hir_rw, Rewrite::Identity)); + + // Build let binding + let arr = self.get_subexpr(ex, 0); + let idx = Rewrite::Cast( + Box::new(self.get_subexpr(ex, 1)), + Box::new(Rewrite::Print("usize".to_owned())), + ); + let rw_let = Rewrite::Let(vec![("arr".into(), arr), ("idx".into(), idx)]); + let arr = Rewrite::Text("arr".into()); + let idx = Rewrite::Text("idx".into()); + + // Build closure + let elem = + Rewrite::SliceRange(Box::new(arr.clone()), Some(Box::new(idx)), None); + let ref_elem = Rewrite::Ref(Box::new(elem), mutbl_from_bool(mutbl)); + let closure = Rewrite::Closure1("arr".into(), Box::new(ref_elem)); + + // Call `Option::map` + let call = Rewrite::MethodCall("map".into(), Box::new(arr), vec![closure]); + Rewrite::Block(vec![rw_let], Some(Box::new(call))) + } + mir_op::RewriteKind::RemoveAsPtr => { // `slice.as_ptr()` -> `slice` assert!(matches!(hir_rw, Rewrite::Identity)); diff --git a/c2rust-analyze/src/rewrite/expr/mir_op.rs b/c2rust-analyze/src/rewrite/expr/mir_op.rs index 51a071307..1cfcd42a7 100644 --- a/c2rust-analyze/src/rewrite/expr/mir_op.rs +++ b/c2rust-analyze/src/rewrite/expr/mir_op.rs @@ -49,6 +49,8 @@ pub enum SubLoc { pub enum RewriteKind { /// Replace `ptr.offset(i)` with something like `&ptr[i..]`. OffsetSlice { mutbl: bool }, + /// Replace `ptr.offset(i)` with something like `ptr.as_ref().map(|p| &p[i..])`. + OptionMapOffsetSlice { mutbl: bool }, /// Replace `slice` with `&slice[0]`. SliceFirst { mutbl: bool }, /// Replace `ptr` with `&*ptr`, converting `&mut T` to `&T`. @@ -641,9 +643,11 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { // Emit `OffsetSlice` for the offset itself. let mutbl = matches!(result_desc.own, Ownership::Mut); - // TODO: need to support an `Option`-compatible version of `OffsetSlice` - // e.g. `{ let offset = <...>; p.map(|p| &p[offset..]) } - v.emit(RewriteKind::OffsetSlice { mutbl }); + if !result_desc.option { + v.emit(RewriteKind::OffsetSlice { mutbl }); + } else { + v.emit(RewriteKind::OptionMapOffsetSlice { mutbl }); + } // The `OffsetSlice` operation returns something of the same type as its input. // Afterward, we must cast the result to the `result_ty`/`result_desc`. diff --git a/c2rust-analyze/src/rewrite/mod.rs b/c2rust-analyze/src/rewrite/mod.rs index affebe42e..52b399653 100644 --- a/c2rust-analyze/src/rewrite/mod.rs +++ b/c2rust-analyze/src/rewrite/mod.rs @@ -98,6 +98,9 @@ pub enum Rewrite { /// Single-variable `let` binding. This has the same scoping issues as multi-variable `Let`; /// because of this, `Let` should generally be used instead of multiple `Let1`s. Let1(String, Box), + /// Single-argument closure. As with `Let` and `Let1`, the body must be carefully constructed + /// to avoid potential shadowing. + Closure1(String, Box), // Type builders /// Emit a complete pretty-printed type, discarding the original annotation. @@ -196,6 +199,7 @@ impl Rewrite { Let(new_vars) } Let1(ref name, ref rw) => Let1(String::clone(name), try_subst(rw)?), + Closure1(ref name, ref rw) => Closure1(String::clone(name), try_subst(rw)?), Print(ref s) => Print(String::clone(s)), TyPtr(ref rw, mutbl) => TyPtr(try_subst(rw)?, mutbl), diff --git a/c2rust-analyze/tests/filecheck.rs b/c2rust-analyze/tests/filecheck.rs index c33f1f322..e376e79c4 100644 --- a/c2rust-analyze/tests/filecheck.rs +++ b/c2rust-analyze/tests/filecheck.rs @@ -57,6 +57,7 @@ define_tests! { known_fn, non_null, non_null_force, + non_null_map, offset1, offset2, pointee, diff --git a/c2rust-analyze/tests/filecheck/non_null_map.rs b/c2rust-analyze/tests/filecheck/non_null_map.rs new file mode 100644 index 000000000..6b7234b32 --- /dev/null +++ b/c2rust-analyze/tests/filecheck/non_null_map.rs @@ -0,0 +1,15 @@ +use std::ptr; + +// CHECK-LABEL: unsafe fn f{{[<(]}} +pub unsafe fn f(cond: bool, p: *mut i32) { + let mut p = p; + if cond { + p = ptr::null_mut(); + } + + + // CHECK: let ([[arr:.+]], [[idx:.+]], ) = ((p), (3) as usize, ); + // CHECK-NEXT: [[arr]].map(|arr| &arr[[[idx]] ..]) + let q = p.offset(3); +} + From 06df31cf5820722fa0bc00fe5804e943e71a6bf6 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Fri, 3 May 2024 16:20:05 -0700 Subject: [PATCH 04/30] analyze: add OptionMapBegin/End rewrites --- c2rust-analyze/src/rewrite/expr/convert.rs | 12 +++++ c2rust-analyze/src/rewrite/expr/mir_op.rs | 45 +++++++++++++++---- .../tests/filecheck/non_null_map.rs | 2 +- 3 files changed, 50 insertions(+), 9 deletions(-) diff --git a/c2rust-analyze/src/rewrite/expr/convert.rs b/c2rust-analyze/src/rewrite/expr/convert.rs index 4ea3b66f9..4c2355ad2 100644 --- a/c2rust-analyze/src/rewrite/expr/convert.rs +++ b/c2rust-analyze/src/rewrite/expr/convert.rs @@ -504,6 +504,18 @@ pub fn convert_cast_rewrite(kind: &mir_op::RewriteKind, hir_rw: Rewrite) -> Rewr Rewrite::Call("std::option::Option::Some".to_string(), vec![hir_rw]) } + mir_op::RewriteKind::OptionMapBegin => { + // `p` -> `p.unwrap()` + Rewrite::MethodCall("unwrap /*map_begin*/".to_string(), Box::new(hir_rw), vec![]) + } + mir_op::RewriteKind::OptionMapEnd => { + // `p` -> `Some(p)` + Rewrite::Call( + "std::option::Option::Some /*map_end*/".to_string(), + vec![hir_rw], + ) + } + mir_op::RewriteKind::CastRefToRaw { mutbl } => { // `addr_of!(*p)` is cleaner than `p as *const _`; we don't know the pointee // type here, so we can't emit `p as *const T`. diff --git a/c2rust-analyze/src/rewrite/expr/mir_op.rs b/c2rust-analyze/src/rewrite/expr/mir_op.rs index 1cfcd42a7..4cda4a5b6 100644 --- a/c2rust-analyze/src/rewrite/expr/mir_op.rs +++ b/c2rust-analyze/src/rewrite/expr/mir_op.rs @@ -84,6 +84,16 @@ pub enum RewriteKind { OptionUnwrap, /// Convert `T` to `Option` by wrapping the value in `Some`. OptionSome, + /// Begin an `Option::map` operation, converting `Option` to `T`. + OptionMapBegin, + /// End an `Option::map` operation, converting `T` to `Option`. + /// + /// `OptionMapBegin` and `OptionMapEnd` could legally be implemented as aliases for + /// `OptionUnwrap` and `OptionSome` respectively. However, when `OptionMapBegin` and + /// `OptionMapEnd` are paired, we instead emit a call to `Option::map` with the intervening + /// rewrites applied within the closure. This has the same effect when the input is `Some`, + /// but passes through `None` unchanged instead of panicking. + OptionMapEnd, /// Cast `&T` to `*const T` or `&mut T` to `*mut T`. CastRefToRaw { mutbl: bool }, @@ -813,17 +823,31 @@ where return Ok(()); } + // TODO: If `from.option`, we need to use `.as_ref()`/`.as_mut()` in some cases to avoid + // moving `from`. Which one to use depends on the ownership of `from` and `to`. + + let mut in_option_map = false; if from.option && !to.option { // Unwrap first, then perform remaining casts. - // TODO: Need to use `.as_ref()`/`.as_mut()` in some cases to avoid moving `from`. - // Which one to use depends on the ownership of `from` and `to`. (self.emit)(RewriteKind::OptionUnwrap); from.option = false; - } - - if from.option && to.option { - // FIXME: mapping casts over an `Option` is not yet implemented - from.option = to.option; + } else if from.option && to.option { + eprintln!("try_build_cast_desc_desc: emit OptionMapBegin"); + if from.own != to.own { + eprintln!(" own differs: {:?} != {:?}", from.own, to.own); + } + if from.qty != to.qty { + eprintln!(" qty differs: {:?} != {:?}", from.qty, to.qty); + } + if from.pointee_ty != to.pointee_ty { + eprintln!( + " pointee_ty differs: {:?} != {:?}", + from.pointee_ty, to.pointee_ty + ); + } + (self.emit)(RewriteKind::OptionMapBegin); + from.option = false; + in_option_map = true; } // Early `Ownership` casts. We do certain casts here in hopes of reaching an `Ownership` @@ -880,7 +904,12 @@ where // Late `Ownership` casts. from.own = self.cast_ownership(from, to, false)?; - if !from.option && to.option { + if in_option_map { + assert!(!from.option); + assert!(to.option); + (self.emit)(RewriteKind::OptionMapEnd); + from.option = true; + } else if !from.option && to.option { // Wrap at the end, after performing all other steps of the cast. (self.emit)(RewriteKind::OptionSome); from.option = true; diff --git a/c2rust-analyze/tests/filecheck/non_null_map.rs b/c2rust-analyze/tests/filecheck/non_null_map.rs index 6b7234b32..9761db6d0 100644 --- a/c2rust-analyze/tests/filecheck/non_null_map.rs +++ b/c2rust-analyze/tests/filecheck/non_null_map.rs @@ -8,7 +8,7 @@ pub unsafe fn f(cond: bool, p: *mut i32) { } - // CHECK: let ([[arr:.+]], [[idx:.+]], ) = ((p), (3) as usize, ); + // CHECK: let ([[arr:.+]], [[idx:.+]], ) = ({{.*}}(p){{.*}}, (3) as usize, ); // CHECK-NEXT: [[arr]].map(|arr| &arr[[[idx]] ..]) let q = p.offset(3); } From 4cf70171b29a212f16dad9888467ab1e58936661 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Fri, 3 May 2024 16:55:03 -0700 Subject: [PATCH 05/30] analyze: rename tests/filecheck/non_null_{map->rewrite}.rs --- c2rust-analyze/tests/filecheck.rs | 2 +- .../tests/filecheck/{non_null_map.rs => non_null_rewrites.rs} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename c2rust-analyze/tests/filecheck/{non_null_map.rs => non_null_rewrites.rs} (100%) diff --git a/c2rust-analyze/tests/filecheck.rs b/c2rust-analyze/tests/filecheck.rs index e376e79c4..94672a386 100644 --- a/c2rust-analyze/tests/filecheck.rs +++ b/c2rust-analyze/tests/filecheck.rs @@ -57,7 +57,7 @@ define_tests! { known_fn, non_null, non_null_force, - non_null_map, + non_null_rewrites, offset1, offset2, pointee, diff --git a/c2rust-analyze/tests/filecheck/non_null_map.rs b/c2rust-analyze/tests/filecheck/non_null_rewrites.rs similarity index 100% rename from c2rust-analyze/tests/filecheck/non_null_map.rs rename to c2rust-analyze/tests/filecheck/non_null_rewrites.rs From 41844bb7dbcb093420fc6286b9b77bcd92cc6177 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Fri, 3 May 2024 16:57:35 -0700 Subject: [PATCH 06/30] analyze: implement Option::as_ref/as_mut ownership rewrites --- c2rust-analyze/src/rewrite/expr/convert.rs | 19 ++++++++++ c2rust-analyze/src/rewrite/expr/mir_op.rs | 35 +++++++++++++++++-- .../tests/filecheck/non_null_rewrites.rs | 27 ++++++++++++++ 3 files changed, 79 insertions(+), 2 deletions(-) diff --git a/c2rust-analyze/src/rewrite/expr/convert.rs b/c2rust-analyze/src/rewrite/expr/convert.rs index 4c2355ad2..f71284b6f 100644 --- a/c2rust-analyze/src/rewrite/expr/convert.rs +++ b/c2rust-analyze/src/rewrite/expr/convert.rs @@ -516,6 +516,25 @@ pub fn convert_cast_rewrite(kind: &mir_op::RewriteKind, hir_rw: Rewrite) -> Rewr ) } + mir_op::RewriteKind::OptionDowngrade { mutbl, deref } => { + // `p` -> `Some(p)` + let ref_method = if mutbl { + "as_mut".into() + } else { + "as_ref".into() + }; + let mut hir_rw = Rewrite::MethodCall(ref_method, Box::new(hir_rw), vec![]); + if deref { + let deref_method = if mutbl { + "as_deref_mut".into() + } else { + "as_deref".into() + }; + hir_rw = Rewrite::MethodCall(deref_method, Box::new(hir_rw), vec![]); + } + hir_rw + } + mir_op::RewriteKind::CastRefToRaw { mutbl } => { // `addr_of!(*p)` is cleaner than `p as *const _`; we don't know the pointee // type here, so we can't emit `p as *const T`. diff --git a/c2rust-analyze/src/rewrite/expr/mir_op.rs b/c2rust-analyze/src/rewrite/expr/mir_op.rs index 4cda4a5b6..158bc9855 100644 --- a/c2rust-analyze/src/rewrite/expr/mir_op.rs +++ b/c2rust-analyze/src/rewrite/expr/mir_op.rs @@ -94,6 +94,9 @@ pub enum RewriteKind { /// rewrites applied within the closure. This has the same effect when the input is `Some`, /// but passes through `None` unchanged instead of panicking. OptionMapEnd, + /// Downgrade ownership of an `Option` to `Option<&_>` or `Option<&mut _>` by calling + /// `as_ref()`/`as_mut()` and optionally `as_deref()`/`as_deref_mut()`. + OptionDowngrade { mutbl: bool, deref: bool }, /// Cast `&T` to `*const T` or `&mut T` to `*mut T`. CastRefToRaw { mutbl: bool }, @@ -823,8 +826,36 @@ where return Ok(()); } - // TODO: If `from.option`, we need to use `.as_ref()`/`.as_mut()` in some cases to avoid - // moving `from`. Which one to use depends on the ownership of `from` and `to`. + if from.option && from.own != to.own { + // Downgrade ownership before unwrapping the `Option` when possible. This can avoid + // moving/consuming the input. For example, if the `from` type is `Option>` and + // `to` is `&mut T`, we start by calling `p.as_mut().as_deref()`, which gives + // `Option<&mut T>` without consuming `p`. + match from.own { + Ownership::Raw | Ownership::RawMut | Ownership::Imm | Ownership::Cell => { + // No-op. The `from` type is `Copy`, so we can unwrap it without consequence. + } + Ownership::Mut | Ownership::Rc | Ownership::Box => match to.own { + Ownership::Raw | Ownership::Imm => { + (self.emit)(RewriteKind::OptionDowngrade { + mutbl: false, + deref: true, + }); + from.own = Ownership::Imm; + } + Ownership::RawMut | Ownership::Cell | Ownership::Mut => { + (self.emit)(RewriteKind::OptionDowngrade { + mutbl: true, + deref: true, + }); + from.own = Ownership::Mut; + } + _ => { + // Remaining cases are unsupported. + } + }, + } + } let mut in_option_map = false; if from.option && !to.option { diff --git a/c2rust-analyze/tests/filecheck/non_null_rewrites.rs b/c2rust-analyze/tests/filecheck/non_null_rewrites.rs index 9761db6d0..8d6cf2109 100644 --- a/c2rust-analyze/tests/filecheck/non_null_rewrites.rs +++ b/c2rust-analyze/tests/filecheck/non_null_rewrites.rs @@ -13,3 +13,30 @@ pub unsafe fn f(cond: bool, p: *mut i32) { let q = p.offset(3); } + +// CHECK-LABEL: unsafe fn call_use_mut( +unsafe fn call_use_mut(cond: bool) -> i32 { + let mut x = 1; + let p = if cond { + + ptr::addr_of_mut!(x) + } else { + ptr::null_mut() + }; + use_mut(p) +} + +// CHECK-LABEL: unsafe fn use_mut{{[<(]}} +unsafe fn use_mut(p: *mut i32) -> i32 { + if !p.is_null() { + *p = 1; + } + // CHECK: use_const + // CHECK-SAME: (p).as_ref().as_deref() + use_const(p) +} + +// CHECK-LABEL: unsafe fn use_const{{[<(]}} +unsafe fn use_const(p: *const i32) -> i32 { + *p +} From 85e0cc1f5567dffe9202c2902c67b02f405d6a75 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 6 May 2024 10:58:12 -0700 Subject: [PATCH 07/30] analyze: refactor: new rewrite_from_mir_rws helper in rewrite::expr::convert --- c2rust-analyze/src/rewrite/expr/convert.rs | 31 +++++++++++----------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/c2rust-analyze/src/rewrite/expr/convert.rs b/c2rust-analyze/src/rewrite/expr/convert.rs index f71284b6f..5a69aacc5 100644 --- a/c2rust-analyze/src/rewrite/expr/convert.rs +++ b/c2rust-analyze/src/rewrite/expr/convert.rs @@ -146,7 +146,7 @@ impl<'tcx> Visitor<'tcx> for ConvertVisitor<'tcx> { intravisit::walk_expr(this, ex); }); - let rewrite_from_mir_rws = |rw: &mir_op::RewriteKind, hir_rw: Rewrite| -> Rewrite { + let rewrite_from_mir_rw = |rw: &mir_op::RewriteKind, hir_rw: Rewrite| -> Rewrite { // Cases that extract a subexpression are handled here; cases that only wrap the // top-level expression (and thus can handle a non-`Identity` `hir_rw`) are handled by // `convert_cast_rewrite`. @@ -305,14 +305,19 @@ impl<'tcx> Visitor<'tcx> for ConvertVisitor<'tcx> { } }; + let rewrite_from_mir_rws = |mir_rws: &[DistRewrite], mut hir_rw: Rewrite| -> Rewrite { + for mir_rw in mir_rws { + hir_rw = rewrite_from_mir_rw(&mir_rw.rw, hir_rw); + } + hir_rw + }; + // Apply rewrites on the expression itself. These will be the first rewrites in the sorted // list produced by `distribute`. let expr_rws = take_prefix_while(&mut mir_rws, |x: &DistRewrite| { matches!(x.desc, MirOriginDesc::Expr) }); - for mir_rw in expr_rws { - hir_rw = rewrite_from_mir_rws(&mir_rw.rw, hir_rw); - } + hir_rw = rewrite_from_mir_rws(expr_rws, hir_rw); // Materialize adjustments if requested by an ancestor or required locally. let has_adjustment_rewrites = mir_rws @@ -320,25 +325,21 @@ impl<'tcx> Visitor<'tcx> for ConvertVisitor<'tcx> { .any(|x| matches!(x.desc, MirOriginDesc::Adjustment(_))); if self.materialize_adjustments || has_adjustment_rewrites { let adjusts = self.typeck_results.expr_adjustments(ex); - hir_rw = materialize_adjustments(self.tcx, adjusts, hir_rw, |i, mut hir_rw| { + hir_rw = materialize_adjustments(self.tcx, adjusts, hir_rw, |i, hir_rw| { let adj_rws = take_prefix_while(&mut mir_rws, |x| x.desc == MirOriginDesc::Adjustment(i)); - for mir_rw in adj_rws { - eprintln!("would apply {mir_rw:?} for adjustment #{i}, over {hir_rw:?}"); - hir_rw = rewrite_from_mir_rws(&mir_rw.rw, hir_rw); - } - hir_rw + rewrite_from_mir_rws(adj_rws, hir_rw) }); } // Apply late rewrites. - for mir_rw in mir_rws { - assert!(matches!( + assert!(mir_rws.iter().all(|mir_rw| { + matches!( mir_rw.desc, MirOriginDesc::StoreIntoLocal | MirOriginDesc::LoadFromTemp - )); - hir_rw = rewrite_from_mir_rws(&mir_rw.rw, hir_rw); - } + ) + })); + hir_rw = rewrite_from_mir_rws(mir_rws, hir_rw); if !matches!(hir_rw, Rewrite::Identity) { eprintln!( From da9bfcc09a08ee8abaf63c76de35279caf141d6e Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 6 May 2024 11:03:02 -0700 Subject: [PATCH 08/30] analyze: refactor: turn rewrite_from_mir_rw closures into methods --- c2rust-analyze/src/rewrite/expr/convert.rs | 343 +++++++++++---------- 1 file changed, 174 insertions(+), 169 deletions(-) diff --git a/c2rust-analyze/src/rewrite/expr/convert.rs b/c2rust-analyze/src/rewrite/expr/convert.rs index 5a69aacc5..7296424a7 100644 --- a/c2rust-analyze/src/rewrite/expr/convert.rs +++ b/c2rust-analyze/src/rewrite/expr/convert.rs @@ -119,6 +119,177 @@ impl<'tcx> ConvertVisitor<'tcx> { } rw_sub } + + fn rewrite_from_mir_rw( + &self, + ex: &'tcx hir::Expr<'tcx>, + rw: &mir_op::RewriteKind, + hir_rw: Rewrite, + ) -> Rewrite { + // Cases that extract a subexpression are handled here; cases that only wrap the + // top-level expression (and thus can handle a non-`Identity` `hir_rw`) are handled by + // `convert_cast_rewrite`. + match *rw { + mir_op::RewriteKind::OffsetSlice { mutbl } => { + // `p.offset(i)` -> `&p[i as usize ..]` + assert!(matches!(hir_rw, Rewrite::Identity)); + let arr = self.get_subexpr(ex, 0); + let idx = Rewrite::Cast( + Box::new(self.get_subexpr(ex, 1)), + Box::new(Rewrite::Print("usize".to_owned())), + ); + let elem = Rewrite::SliceRange(Box::new(arr), Some(Box::new(idx)), None); + Rewrite::Ref(Box::new(elem), mutbl_from_bool(mutbl)) + } + + mir_op::RewriteKind::OptionMapOffsetSlice { mutbl } => { + // `p.offset(i)` -> `p.as_ref().map(|p| &p[i as usize ..])` + assert!(matches!(hir_rw, Rewrite::Identity)); + + // Build let binding + let arr = self.get_subexpr(ex, 0); + let idx = Rewrite::Cast( + Box::new(self.get_subexpr(ex, 1)), + Box::new(Rewrite::Print("usize".to_owned())), + ); + let rw_let = Rewrite::Let(vec![("arr".into(), arr), ("idx".into(), idx)]); + let arr = Rewrite::Text("arr".into()); + let idx = Rewrite::Text("idx".into()); + + // Build closure + let elem = Rewrite::SliceRange(Box::new(arr.clone()), Some(Box::new(idx)), None); + let ref_elem = Rewrite::Ref(Box::new(elem), mutbl_from_bool(mutbl)); + let closure = Rewrite::Closure1("arr".into(), Box::new(ref_elem)); + + // Call `Option::map` + let call = Rewrite::MethodCall("map".into(), Box::new(arr), vec![closure]); + Rewrite::Block(vec![rw_let], Some(Box::new(call))) + } + + mir_op::RewriteKind::RemoveAsPtr => { + // `slice.as_ptr()` -> `slice` + assert!(matches!(hir_rw, Rewrite::Identity)); + self.get_subexpr(ex, 0) + } + + mir_op::RewriteKind::RemoveCast => { + // `x as T` -> `x` + match hir_rw { + Rewrite::Identity => { + assert!(matches!(hir_rw, Rewrite::Identity)); + self.get_subexpr(ex, 0) + } + // Can happen when attempting to delete a cast adjustment. + Rewrite::Cast(rw, _) => *rw, + Rewrite::RemovedCast(rw) => *rw, + _ => panic!("unexpected hir_rw {hir_rw:?} for RawToRef"), + } + } + + mir_op::RewriteKind::RawToRef { mutbl } => { + // &raw _ to &_ or &raw mut _ to &mut _ + match hir_rw { + Rewrite::Identity => { + Rewrite::Ref(Box::new(self.get_subexpr(ex, 0)), mutbl_from_bool(mutbl)) + } + Rewrite::AddrOf(rw, mutbl) => Rewrite::Ref(rw, mutbl), + _ => panic!("unexpected hir_rw {hir_rw:?} for RawToRef"), + } + } + + mir_op::RewriteKind::MemcpySafe { + elem_size, + dest_single, + src_single, + } => { + // `memcpy(dest, src, n)` to a `copy_from_slice` call + assert!(matches!(hir_rw, Rewrite::Identity)); + assert!(!dest_single, "&T -> &[T] conversion for memcpy dest NYI"); + assert!(!src_single, "&T -> &[T] conversion for memcpy src NYI"); + Rewrite::Block( + vec![ + Rewrite::Let(vec![ + ("dest".into(), self.get_subexpr(ex, 0)), + ("src".into(), self.get_subexpr(ex, 1)), + ("byte_len".into(), self.get_subexpr(ex, 2)), + ]), + Rewrite::Let(vec![( + "n".into(), + format_rewrite!("byte_len as usize / {elem_size}"), + )]), + Rewrite::MethodCall( + "copy_from_slice".into(), + Box::new(format_rewrite!("dest[..n]")), + vec![format_rewrite!("&src[..n]")], + ), + ], + Some(Box::new(format_rewrite!("dest"))), + ) + } + + mir_op::RewriteKind::MemsetZeroize { + ref zero_ty, + elem_size, + dest_single, + } => { + // `memcpy(dest, src, n)` to a `copy_from_slice` call + assert!(matches!(hir_rw, Rewrite::Identity)); + let zeroize_body = if dest_single { + Rewrite::Text(generate_zeroize_code(zero_ty, "(*dest)")) + } else { + format_rewrite!( + "for i in 0..n {{\n {};\n}}", + generate_zeroize_code(zero_ty, "(*dest)[i]") + ) + }; + Rewrite::Block( + vec![ + Rewrite::Let(vec![ + ("dest".into(), self.get_subexpr(ex, 0)), + ("val".into(), self.get_subexpr(ex, 1)), + ("byte_len".into(), self.get_subexpr(ex, 2)), + ]), + Rewrite::Let(vec![( + "n".into(), + format_rewrite!("byte_len as usize / {elem_size}"), + )]), + format_rewrite!("assert_eq!(val, 0, \"non-zero memset NYI\")"), + zeroize_body, + ], + Some(Box::new(format_rewrite!("dest"))), + ) + } + + mir_op::RewriteKind::CellGet => { + // `*x` to `Cell::get(x)` + assert!(matches!(hir_rw, Rewrite::Identity)); + Rewrite::MethodCall("get".to_string(), Box::new(self.get_subexpr(ex, 0)), vec![]) + } + + mir_op::RewriteKind::CellSet => { + // `*x` to `Cell::set(x)` + assert!(matches!(hir_rw, Rewrite::Identity)); + let deref_lhs = assert_matches!(ex.kind, ExprKind::Assign(lhs, ..) => lhs); + let lhs = self.get_subexpr(deref_lhs, 0); + let rhs = self.get_subexpr(ex, 1); + Rewrite::MethodCall("set".to_string(), Box::new(lhs), vec![rhs]) + } + + _ => convert_cast_rewrite(rw, hir_rw), + } + } + + fn rewrite_from_mir_rws( + &self, + ex: &'tcx hir::Expr<'tcx>, + mir_rws: &[DistRewrite], + mut hir_rw: Rewrite, + ) -> Rewrite { + for mir_rw in mir_rws { + hir_rw = self.rewrite_from_mir_rw(ex, &mir_rw.rw, hir_rw); + } + hir_rw + } } impl<'tcx> Visitor<'tcx> for ConvertVisitor<'tcx> { @@ -146,178 +317,12 @@ impl<'tcx> Visitor<'tcx> for ConvertVisitor<'tcx> { intravisit::walk_expr(this, ex); }); - let rewrite_from_mir_rw = |rw: &mir_op::RewriteKind, hir_rw: Rewrite| -> Rewrite { - // Cases that extract a subexpression are handled here; cases that only wrap the - // top-level expression (and thus can handle a non-`Identity` `hir_rw`) are handled by - // `convert_cast_rewrite`. - match *rw { - mir_op::RewriteKind::OffsetSlice { mutbl } => { - // `p.offset(i)` -> `&p[i as usize ..]` - assert!(matches!(hir_rw, Rewrite::Identity)); - let arr = self.get_subexpr(ex, 0); - let idx = Rewrite::Cast( - Box::new(self.get_subexpr(ex, 1)), - Box::new(Rewrite::Print("usize".to_owned())), - ); - let elem = Rewrite::SliceRange(Box::new(arr), Some(Box::new(idx)), None); - Rewrite::Ref(Box::new(elem), mutbl_from_bool(mutbl)) - } - - mir_op::RewriteKind::OptionMapOffsetSlice { mutbl } => { - // `p.offset(i)` -> `p.as_ref().map(|p| &p[i as usize ..])` - assert!(matches!(hir_rw, Rewrite::Identity)); - - // Build let binding - let arr = self.get_subexpr(ex, 0); - let idx = Rewrite::Cast( - Box::new(self.get_subexpr(ex, 1)), - Box::new(Rewrite::Print("usize".to_owned())), - ); - let rw_let = Rewrite::Let(vec![("arr".into(), arr), ("idx".into(), idx)]); - let arr = Rewrite::Text("arr".into()); - let idx = Rewrite::Text("idx".into()); - - // Build closure - let elem = - Rewrite::SliceRange(Box::new(arr.clone()), Some(Box::new(idx)), None); - let ref_elem = Rewrite::Ref(Box::new(elem), mutbl_from_bool(mutbl)); - let closure = Rewrite::Closure1("arr".into(), Box::new(ref_elem)); - - // Call `Option::map` - let call = Rewrite::MethodCall("map".into(), Box::new(arr), vec![closure]); - Rewrite::Block(vec![rw_let], Some(Box::new(call))) - } - - mir_op::RewriteKind::RemoveAsPtr => { - // `slice.as_ptr()` -> `slice` - assert!(matches!(hir_rw, Rewrite::Identity)); - self.get_subexpr(ex, 0) - } - - mir_op::RewriteKind::RemoveCast => { - // `x as T` -> `x` - match hir_rw { - Rewrite::Identity => { - assert!(matches!(hir_rw, Rewrite::Identity)); - self.get_subexpr(ex, 0) - } - // Can happen when attempting to delete a cast adjustment. - Rewrite::Cast(rw, _) => *rw, - Rewrite::RemovedCast(rw) => *rw, - _ => panic!("unexpected hir_rw {hir_rw:?} for RawToRef"), - } - } - - mir_op::RewriteKind::RawToRef { mutbl } => { - // &raw _ to &_ or &raw mut _ to &mut _ - match hir_rw { - Rewrite::Identity => { - Rewrite::Ref(Box::new(self.get_subexpr(ex, 0)), mutbl_from_bool(mutbl)) - } - Rewrite::AddrOf(rw, mutbl) => Rewrite::Ref(rw, mutbl), - _ => panic!("unexpected hir_rw {hir_rw:?} for RawToRef"), - } - } - - mir_op::RewriteKind::MemcpySafe { - elem_size, - dest_single, - src_single, - } => { - // `memcpy(dest, src, n)` to a `copy_from_slice` call - assert!(matches!(hir_rw, Rewrite::Identity)); - assert!(!dest_single, "&T -> &[T] conversion for memcpy dest NYI"); - assert!(!src_single, "&T -> &[T] conversion for memcpy src NYI"); - Rewrite::Block( - vec![ - Rewrite::Let(vec![ - ("dest".into(), self.get_subexpr(ex, 0)), - ("src".into(), self.get_subexpr(ex, 1)), - ("byte_len".into(), self.get_subexpr(ex, 2)), - ]), - Rewrite::Let(vec![( - "n".into(), - format_rewrite!("byte_len as usize / {elem_size}"), - )]), - Rewrite::MethodCall( - "copy_from_slice".into(), - Box::new(format_rewrite!("dest[..n]")), - vec![format_rewrite!("&src[..n]")], - ), - ], - Some(Box::new(format_rewrite!("dest"))), - ) - } - - mir_op::RewriteKind::MemsetZeroize { - ref zero_ty, - elem_size, - dest_single, - } => { - // `memcpy(dest, src, n)` to a `copy_from_slice` call - assert!(matches!(hir_rw, Rewrite::Identity)); - let zeroize_body = if dest_single { - Rewrite::Text(generate_zeroize_code(zero_ty, "(*dest)")) - } else { - format_rewrite!( - "for i in 0..n {{\n {};\n}}", - generate_zeroize_code(zero_ty, "(*dest)[i]") - ) - }; - Rewrite::Block( - vec![ - Rewrite::Let(vec![ - ("dest".into(), self.get_subexpr(ex, 0)), - ("val".into(), self.get_subexpr(ex, 1)), - ("byte_len".into(), self.get_subexpr(ex, 2)), - ]), - Rewrite::Let(vec![( - "n".into(), - format_rewrite!("byte_len as usize / {elem_size}"), - )]), - format_rewrite!("assert_eq!(val, 0, \"non-zero memset NYI\")"), - zeroize_body, - ], - Some(Box::new(format_rewrite!("dest"))), - ) - } - - mir_op::RewriteKind::CellGet => { - // `*x` to `Cell::get(x)` - assert!(matches!(hir_rw, Rewrite::Identity)); - Rewrite::MethodCall( - "get".to_string(), - Box::new(self.get_subexpr(ex, 0)), - vec![], - ) - } - - mir_op::RewriteKind::CellSet => { - // `*x` to `Cell::set(x)` - assert!(matches!(hir_rw, Rewrite::Identity)); - let deref_lhs = assert_matches!(ex.kind, ExprKind::Assign(lhs, ..) => lhs); - let lhs = self.get_subexpr(deref_lhs, 0); - let rhs = self.get_subexpr(ex, 1); - Rewrite::MethodCall("set".to_string(), Box::new(lhs), vec![rhs]) - } - - _ => convert_cast_rewrite(rw, hir_rw), - } - }; - - let rewrite_from_mir_rws = |mir_rws: &[DistRewrite], mut hir_rw: Rewrite| -> Rewrite { - for mir_rw in mir_rws { - hir_rw = rewrite_from_mir_rw(&mir_rw.rw, hir_rw); - } - hir_rw - }; - // Apply rewrites on the expression itself. These will be the first rewrites in the sorted // list produced by `distribute`. let expr_rws = take_prefix_while(&mut mir_rws, |x: &DistRewrite| { matches!(x.desc, MirOriginDesc::Expr) }); - hir_rw = rewrite_from_mir_rws(expr_rws, hir_rw); + hir_rw = self.rewrite_from_mir_rws(ex, expr_rws, hir_rw); // Materialize adjustments if requested by an ancestor or required locally. let has_adjustment_rewrites = mir_rws @@ -328,7 +333,7 @@ impl<'tcx> Visitor<'tcx> for ConvertVisitor<'tcx> { hir_rw = materialize_adjustments(self.tcx, adjusts, hir_rw, |i, hir_rw| { let adj_rws = take_prefix_while(&mut mir_rws, |x| x.desc == MirOriginDesc::Adjustment(i)); - rewrite_from_mir_rws(adj_rws, hir_rw) + self.rewrite_from_mir_rws(ex, adj_rws, hir_rw) }); } @@ -339,7 +344,7 @@ impl<'tcx> Visitor<'tcx> for ConvertVisitor<'tcx> { MirOriginDesc::StoreIntoLocal | MirOriginDesc::LoadFromTemp ) })); - hir_rw = rewrite_from_mir_rws(mir_rws, hir_rw); + hir_rw = self.rewrite_from_mir_rws(ex, mir_rws, hir_rw); if !matches!(hir_rw, Rewrite::Identity) { eprintln!( From 07c4906a415324132fca4a07a74212d595bfdcac Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 6 May 2024 11:21:08 -0700 Subject: [PATCH 09/30] analyze: refactor: make Expr optional in rewrite_from_mir_rw --- c2rust-analyze/src/rewrite/expr/convert.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/c2rust-analyze/src/rewrite/expr/convert.rs b/c2rust-analyze/src/rewrite/expr/convert.rs index 7296424a7..f0ceec79f 100644 --- a/c2rust-analyze/src/rewrite/expr/convert.rs +++ b/c2rust-analyze/src/rewrite/expr/convert.rs @@ -122,10 +122,15 @@ impl<'tcx> ConvertVisitor<'tcx> { fn rewrite_from_mir_rw( &self, - ex: &'tcx hir::Expr<'tcx>, + ex: Option<&'tcx hir::Expr<'tcx>>, rw: &mir_op::RewriteKind, hir_rw: Rewrite, ) -> Rewrite { + if ex.is_none() { + return convert_cast_rewrite(rw, hir_rw); + } + let ex = ex.unwrap(); + // Cases that extract a subexpression are handled here; cases that only wrap the // top-level expression (and thus can handle a non-`Identity` `hir_rw`) are handled by // `convert_cast_rewrite`. @@ -281,7 +286,7 @@ impl<'tcx> ConvertVisitor<'tcx> { fn rewrite_from_mir_rws( &self, - ex: &'tcx hir::Expr<'tcx>, + ex: Option<&'tcx hir::Expr<'tcx>>, mir_rws: &[DistRewrite], mut hir_rw: Rewrite, ) -> Rewrite { @@ -322,7 +327,7 @@ impl<'tcx> Visitor<'tcx> for ConvertVisitor<'tcx> { let expr_rws = take_prefix_while(&mut mir_rws, |x: &DistRewrite| { matches!(x.desc, MirOriginDesc::Expr) }); - hir_rw = self.rewrite_from_mir_rws(ex, expr_rws, hir_rw); + hir_rw = self.rewrite_from_mir_rws(Some(ex), expr_rws, hir_rw); // Materialize adjustments if requested by an ancestor or required locally. let has_adjustment_rewrites = mir_rws @@ -333,7 +338,7 @@ impl<'tcx> Visitor<'tcx> for ConvertVisitor<'tcx> { hir_rw = materialize_adjustments(self.tcx, adjusts, hir_rw, |i, hir_rw| { let adj_rws = take_prefix_while(&mut mir_rws, |x| x.desc == MirOriginDesc::Adjustment(i)); - self.rewrite_from_mir_rws(ex, adj_rws, hir_rw) + self.rewrite_from_mir_rws(Some(ex), adj_rws, hir_rw) }); } @@ -344,7 +349,7 @@ impl<'tcx> Visitor<'tcx> for ConvertVisitor<'tcx> { MirOriginDesc::StoreIntoLocal | MirOriginDesc::LoadFromTemp ) })); - hir_rw = self.rewrite_from_mir_rws(ex, mir_rws, hir_rw); + hir_rw = self.rewrite_from_mir_rws(Some(ex), mir_rws, hir_rw); if !matches!(hir_rw, Rewrite::Identity) { eprintln!( From e7fd66aee6cfc5ea58bc430fe0e6ac0d3d68f388 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 6 May 2024 11:49:11 -0700 Subject: [PATCH 10/30] analyze: implement Option::map rewriting --- c2rust-analyze/src/rewrite/expr/convert.rs | 83 ++++++++++++++++++- .../tests/filecheck/non_null_rewrites.rs | 26 +++++- 2 files changed, 107 insertions(+), 2 deletions(-) diff --git a/c2rust-analyze/src/rewrite/expr/convert.rs b/c2rust-analyze/src/rewrite/expr/convert.rs index f0ceec79f..23c880e81 100644 --- a/c2rust-analyze/src/rewrite/expr/convert.rs +++ b/c2rust-analyze/src/rewrite/expr/convert.rs @@ -284,13 +284,94 @@ impl<'tcx> ConvertVisitor<'tcx> { } } + /// Generate an `Option::map` call from the rewrites in `mir_rws`. After seeing an + /// `OptionMapBegin` in a list of MIR rewrites, pass the remaining rewrites to this method. If + /// it returns `Ok((new_hir_rw, remaining_mir_rws))`, then the `OptionMapBegin` and some + /// additional rewrites have been processed, and only `remaining_mir_rws` are left. Otherwise, + /// if it failed to process any rewrites, it returns `Err(original_hir_rw)`. + fn try_rewrite_option_map<'a>( + &self, + mut mir_rws: &'a [DistRewrite], + mut hir_rw: Rewrite, + ) -> Result<(Rewrite, &'a [DistRewrite]), Rewrite> { + /// Find the next `OptionMapEnd` and return the parts of `mir_rws` that come strictly + /// before it and strictly after it. Returns `None` if there's an `OptionMapBegin` before + /// the next `OptionMapEnd`, or if there's no `OptionMapEnd` found in `mir_rws`. + fn split_option_map_rewrites( + mir_rws: &[DistRewrite], + ) -> Option<(&[DistRewrite], &[DistRewrite])> { + for (i, mir_rw) in mir_rws.iter().enumerate() { + match mir_rw.rw { + // Bail out if we see nested delimiters. + mir_op::RewriteKind::OptionMapBegin => return None, + mir_op::RewriteKind::OptionMapEnd => { + let (a, b) = mir_rws.split_at(i); + return Some((a, &b[1..])); + } + _ => {} + } + } + None + } + + // Build up the body of the closure. We expect `mir_rws` to start just after an + // `OptionMapBegin` delimiter. If we find a matching `OptionMapEnd` (with no intervening + // `OptionMapBegin`; nesting is unsupported), then we add all the rewrites between the + // delimiters to the `Option::map` body. Furthermore, as long as the `OptionMapEnd` is + // followed by another `OptionMapBegin/End` delimited range, we add those rewrites to the + // body as well. + let mut opt_body_hir_rw = None; + // Did we find a matching delimiter for the implicit `OptionMapBegin` that precedes + // `mir_rws`? + let mut found_matching_delim = false; + while let Some((body_rws, other_rws)) = split_option_map_rewrites(mir_rws) { + found_matching_delim = true; + mir_rws = other_rws; + if body_rws.is_empty() { + // Don't initialize `opt_body_hir_rw` until we've seen at least one actual rewrite. + // This lets us omit empty `Option::map` calls. + continue; + } + + let mut body_hir_rw = opt_body_hir_rw.unwrap_or_else(|| Rewrite::Text("__ptr".into())); + body_hir_rw = self.rewrite_from_mir_rws(None, body_rws, body_hir_rw); + + opt_body_hir_rw = Some(body_hir_rw); + } + + if !found_matching_delim { + return Err(hir_rw); + } + + // If some actual rewrites were collected, generate the `map` call. + if let Some(body) = opt_body_hir_rw { + let closure = Rewrite::Closure1("__ptr".into(), Box::new(body)); + hir_rw = Rewrite::MethodCall("map".into(), Box::new(hir_rw), vec![closure]); + } + return Ok((hir_rw, mir_rws)); + } + fn rewrite_from_mir_rws( &self, ex: Option<&'tcx hir::Expr<'tcx>>, mir_rws: &[DistRewrite], mut hir_rw: Rewrite, ) -> Rewrite { - for mir_rw in mir_rws { + let mut iter = mir_rws.iter(); + while let Some(mir_rw) = iter.next() { + if let mir_op::RewriteKind::OptionMapBegin = mir_rw.rw { + match self.try_rewrite_option_map(iter.as_slice(), hir_rw) { + Ok((new_hir_rw, remaining_mir_rws)) => { + hir_rw = new_hir_rw; + iter = remaining_mir_rws.iter(); + continue; + } + Err(orig_hir_rw) => { + hir_rw = orig_hir_rw; + } + } + } + hir_rw = self.rewrite_from_mir_rw(ex, &mir_rw.rw, hir_rw); } hir_rw diff --git a/c2rust-analyze/tests/filecheck/non_null_rewrites.rs b/c2rust-analyze/tests/filecheck/non_null_rewrites.rs index 8d6cf2109..a5da691dd 100644 --- a/c2rust-analyze/tests/filecheck/non_null_rewrites.rs +++ b/c2rust-analyze/tests/filecheck/non_null_rewrites.rs @@ -8,7 +8,7 @@ pub unsafe fn f(cond: bool, p: *mut i32) { } - // CHECK: let ([[arr:.+]], [[idx:.+]], ) = ({{.*}}(p){{.*}}, (3) as usize, ); + // CHECK: let ([[arr:.+]], [[idx:.+]], ) = ((p), (3) as usize, ); // CHECK-NEXT: [[arr]].map(|arr| &arr[[[idx]] ..]) let q = p.offset(3); } @@ -40,3 +40,27 @@ unsafe fn use_mut(p: *mut i32) -> i32 { unsafe fn use_const(p: *const i32) -> i32 { *p } + +// CHECK-LABEL: unsafe fn call_use_slice{{[<(]}} +unsafe fn call_use_slice(cond: bool, q: *const i32) -> i32 { + let p = if cond { + q + } else { + ptr::null_mut() + }; + use_slice(p) +} + +// CHECK-LABEL: unsafe fn use_slice{{[<(]}} +unsafe fn use_slice(p: *const i32) -> i32 { + if !p.is_null() { + let x = *p.offset(1); + } + // CHECK: use_single((p).map(|__ptr| &__ptr[0])) + use_single(p) +} + +// CHECK-LABEL: unsafe fn use_single{{[<(]}} +unsafe fn use_single(p: *const i32) -> i32 { + *p +} From 56a3062568e7f6cf44c164b07d028a80b782cd1c Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 6 May 2024 13:38:55 -0700 Subject: [PATCH 11/30] analyze: rewrite: replace p.is_null() with p.is_none() --- c2rust-analyze/src/rewrite/expr/convert.rs | 11 ++++++++++ c2rust-analyze/src/rewrite/expr/mir_op.rs | 21 +++++++++++++++++++ .../tests/filecheck/non_null_rewrites.rs | 7 +++++++ 3 files changed, 39 insertions(+) diff --git a/c2rust-analyze/src/rewrite/expr/convert.rs b/c2rust-analyze/src/rewrite/expr/convert.rs index 23c880e81..fb1a4924d 100644 --- a/c2rust-analyze/src/rewrite/expr/convert.rs +++ b/c2rust-analyze/src/rewrite/expr/convert.rs @@ -202,6 +202,17 @@ impl<'tcx> ConvertVisitor<'tcx> { } } + mir_op::RewriteKind::IsNullToIsNone => { + // `p.is_null()` -> `p.is_none()` + assert!(matches!(hir_rw, Rewrite::Identity)); + Rewrite::MethodCall("is_none".into(), Box::new(self.get_subexpr(ex, 0)), vec![]) + } + mir_op::RewriteKind::IsNullToConstFalse => { + // `p.is_null()` -> `false` + assert!(matches!(hir_rw, Rewrite::Identity)); + Rewrite::Text("false".into()) + } + mir_op::RewriteKind::MemcpySafe { elem_size, dest_single, diff --git a/c2rust-analyze/src/rewrite/expr/mir_op.rs b/c2rust-analyze/src/rewrite/expr/mir_op.rs index 158bc9855..1b117d6c2 100644 --- a/c2rust-analyze/src/rewrite/expr/mir_op.rs +++ b/c2rust-analyze/src/rewrite/expr/mir_op.rs @@ -62,6 +62,12 @@ pub enum RewriteKind { /// Replace &raw with & or &raw mut with &mut RawToRef { mutbl: bool }, + /// Replace `ptr.is_null()` with `ptr.is_none()`. + IsNullToIsNone, + /// Replace `ptr.is_null()` with the constant `false`. We use this in cases where the rewritten + /// type of `ptr` is non-optional because we inferred `ptr` to be non-nullable. + IsNullToConstFalse, + /// Replace a call to `memcpy(dest, src, n)` with a safe copy operation that works on slices /// instead of raw pointers. `elem_size` is the size of the original, unrewritten pointee /// type, which is used to convert the byte length `n` to an element count. `dest_single` and @@ -481,6 +487,21 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { }); } + Callee::IsNull => { + self.enter_rvalue(|v| { + let arg_lty = v.acx.type_of(&args[0]); + if !v.flags[arg_lty.label].contains(FlagSet::FIXED) { + let arg_non_null = + v.perms[arg_lty.label].contains(PermissionSet::NON_NULL); + if arg_non_null { + v.emit(RewriteKind::IsNullToConstFalse); + } else { + v.emit(RewriteKind::IsNullToIsNone); + } + } + }); + } + _ => {} } } diff --git a/c2rust-analyze/tests/filecheck/non_null_rewrites.rs b/c2rust-analyze/tests/filecheck/non_null_rewrites.rs index a5da691dd..5ad0cb541 100644 --- a/c2rust-analyze/tests/filecheck/non_null_rewrites.rs +++ b/c2rust-analyze/tests/filecheck/non_null_rewrites.rs @@ -43,6 +43,11 @@ unsafe fn use_const(p: *const i32) -> i32 { // CHECK-LABEL: unsafe fn call_use_slice{{[<(]}} unsafe fn call_use_slice(cond: bool, q: *const i32) -> i32 { + // `q` is not nullable, so `q.is_null()` should be rewritten to `false`. + // CHECK: if !false { + if !q.is_null() { + // No-op + } let p = if cond { q } else { @@ -53,6 +58,8 @@ unsafe fn call_use_slice(cond: bool, q: *const i32) -> i32 { // CHECK-LABEL: unsafe fn use_slice{{[<(]}} unsafe fn use_slice(p: *const i32) -> i32 { + // `p`'s new type is `Option<&[i32]>`, so `is_null()` should become `is_none()`. + // CHECK: .is_none() if !p.is_null() { let x = *p.offset(1); } From 84332a4fd1a1dd50b56c346071f4ddb153148ca1 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 6 May 2024 13:49:33 -0700 Subject: [PATCH 12/30] analyze: rewrite: fix option.as_ref().as_deref() -> option.as_deref() --- c2rust-analyze/src/rewrite/expr/convert.rs | 23 +++++++++---------- .../tests/filecheck/non_null_rewrites.rs | 2 +- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/c2rust-analyze/src/rewrite/expr/convert.rs b/c2rust-analyze/src/rewrite/expr/convert.rs index fb1a4924d..0ed70fcf0 100644 --- a/c2rust-analyze/src/rewrite/expr/convert.rs +++ b/c2rust-analyze/src/rewrite/expr/convert.rs @@ -621,21 +621,20 @@ pub fn convert_cast_rewrite(kind: &mir_op::RewriteKind, hir_rw: Rewrite) -> Rewr mir_op::RewriteKind::OptionDowngrade { mutbl, deref } => { // `p` -> `Some(p)` - let ref_method = if mutbl { - "as_mut".into() - } else { - "as_ref".into() - }; - let mut hir_rw = Rewrite::MethodCall(ref_method, Box::new(hir_rw), vec![]); - if deref { - let deref_method = if mutbl { + let ref_method = if deref { + if mutbl { "as_deref_mut".into() } else { "as_deref".into() - }; - hir_rw = Rewrite::MethodCall(deref_method, Box::new(hir_rw), vec![]); - } - hir_rw + } + } else { + if mutbl { + "as_mut".into() + } else { + "as_ref".into() + } + }; + Rewrite::MethodCall(ref_method, Box::new(hir_rw), vec![]) } mir_op::RewriteKind::CastRefToRaw { mutbl } => { diff --git a/c2rust-analyze/tests/filecheck/non_null_rewrites.rs b/c2rust-analyze/tests/filecheck/non_null_rewrites.rs index 5ad0cb541..6351ddc07 100644 --- a/c2rust-analyze/tests/filecheck/non_null_rewrites.rs +++ b/c2rust-analyze/tests/filecheck/non_null_rewrites.rs @@ -32,7 +32,7 @@ unsafe fn use_mut(p: *mut i32) -> i32 { *p = 1; } // CHECK: use_const - // CHECK-SAME: (p).as_ref().as_deref() + // CHECK-SAME: (p).as_deref() use_const(p) } From 53e847637d7c7716deacad2a102e2d2a62a95715 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 6 May 2024 13:49:48 -0700 Subject: [PATCH 13/30] analyze: rewrite ptr::null() to None --- c2rust-analyze/src/rewrite/expr/convert.rs | 5 +++++ c2rust-analyze/src/rewrite/expr/mir_op.rs | 16 ++++++++++++++++ .../tests/filecheck/non_null_rewrites.rs | 2 ++ 3 files changed, 23 insertions(+) diff --git a/c2rust-analyze/src/rewrite/expr/convert.rs b/c2rust-analyze/src/rewrite/expr/convert.rs index 0ed70fcf0..12fd01512 100644 --- a/c2rust-analyze/src/rewrite/expr/convert.rs +++ b/c2rust-analyze/src/rewrite/expr/convert.rs @@ -212,6 +212,11 @@ impl<'tcx> ConvertVisitor<'tcx> { assert!(matches!(hir_rw, Rewrite::Identity)); Rewrite::Text("false".into()) } + mir_op::RewriteKind::PtrNullToNone => { + // `ptr::null()` -> `None` + assert!(matches!(hir_rw, Rewrite::Identity)); + Rewrite::Text("None".into()) + } mir_op::RewriteKind::MemcpySafe { elem_size, diff --git a/c2rust-analyze/src/rewrite/expr/mir_op.rs b/c2rust-analyze/src/rewrite/expr/mir_op.rs index 1b117d6c2..8617bb8af 100644 --- a/c2rust-analyze/src/rewrite/expr/mir_op.rs +++ b/c2rust-analyze/src/rewrite/expr/mir_op.rs @@ -67,6 +67,8 @@ pub enum RewriteKind { /// Replace `ptr.is_null()` with the constant `false`. We use this in cases where the rewritten /// type of `ptr` is non-optional because we inferred `ptr` to be non-nullable. IsNullToConstFalse, + /// Replace `ptr::null()` or `ptr::null_mut()` with `None`. + PtrNullToNone, /// Replace a call to `memcpy(dest, src, n)` with a safe copy operation that works on slices /// instead of raw pointers. `elem_size` is the size of the original, unrewritten pointee @@ -502,6 +504,20 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { }); } + Callee::Null { .. } => { + self.enter_rvalue(|v| { + if !v.flags[pl_ty.label].contains(FlagSet::FIXED) { + let arg_non_null = + v.perms[pl_ty.label].contains(PermissionSet::NON_NULL); + if arg_non_null { + panic!("impossible: result of null() is a NON_NULL pointer?"); + } else { + v.emit(RewriteKind::PtrNullToNone); + } + } + }); + } + _ => {} } } diff --git a/c2rust-analyze/tests/filecheck/non_null_rewrites.rs b/c2rust-analyze/tests/filecheck/non_null_rewrites.rs index 6351ddc07..589b8c27a 100644 --- a/c2rust-analyze/tests/filecheck/non_null_rewrites.rs +++ b/c2rust-analyze/tests/filecheck/non_null_rewrites.rs @@ -49,8 +49,10 @@ unsafe fn call_use_slice(cond: bool, q: *const i32) -> i32 { // No-op } let p = if cond { + // CHECK: Some((q)) q } else { + // CHECK: None ptr::null_mut() }; use_slice(p) From 2c8a5bef220dd4672619d8e42f710c210352f716 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 6 May 2024 16:16:24 -0700 Subject: [PATCH 14/30] analyze: refactor: generalize rewrite::ty::mk_cell to build any ADT --- c2rust-analyze/src/rewrite/ty.rs | 53 +++++++++++++++++--------------- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/c2rust-analyze/src/rewrite/ty.rs b/c2rust-analyze/src/rewrite/ty.rs index d529ac134..ab39c35c2 100644 --- a/c2rust-analyze/src/rewrite/ty.rs +++ b/c2rust-analyze/src/rewrite/ty.rs @@ -333,37 +333,42 @@ impl Convert for mir::Mutability { } } -fn mk_cell<'tcx>(tcx: TyCtxt<'tcx>, ty: ty::Ty<'tcx>) -> ty::Ty<'tcx> { - let core_crate = tcx +fn mk_adt_with_arg<'tcx>(tcx: TyCtxt<'tcx>, path: &str, arg_ty: ty::Ty<'tcx>) -> ty::Ty<'tcx> { + let mut path_parts_iter = path.split("::"); + let crate_name = path_parts_iter + .next() + .unwrap_or_else(|| panic!("couldn't find crate name in {path:?}")); + + let krate = tcx .crates(()) .iter() .cloned() .find(|&krate| tcx.crate_name(krate).as_str() == "core") - .expect("failed to find crate `core`"); + .unwrap_or_else(|| panic!("couldn't find crate {crate_name:?} for {path:?}")); - let cell_mod_child = tcx - .module_children(core_crate.as_def_id()) - .iter() - .find(|child| child.ident.as_str() == "cell") - .expect("failed to find module `core::cell`"); - let cell_mod_did = match cell_mod_child.res { - Res::Def(DefKind::Mod, did) => did, - ref r => panic!("unexpected resolution {:?} for `core::cell`", r), - }; + let mut cur_did = krate.as_def_id(); + for part in path_parts_iter { + let mod_child = tcx + .module_children(cur_did) + .iter() + .find(|child| child.ident.as_str() == part) + .unwrap_or_else(|| panic!("failed to find {part:?} for {path:?}")); + cur_did = match mod_child.res { + Res::Def(DefKind::Mod, did) => did, + Res::Def(DefKind::Struct, did) => did, + Res::Def(DefKind::Enum, did) => did, + Res::Def(DefKind::Union, did) => did, + ref r => panic!("unexpected resolution {r:?} for {part:?} in {path:?}"), + }; + } - let cell_struct_child = tcx - .module_children(cell_mod_did) - .iter() - .find(|child| child.ident.as_str() == "Cell") - .expect("failed to find struct `core::cell::Cell`"); - let cell_struct_did = match cell_struct_child.res { - Res::Def(DefKind::Struct, did) => did, - ref r => panic!("unexpected resolution {:?} for `core::cell::Cell`", r), - }; + let adt = tcx.adt_def(cur_did); + let substs = tcx.mk_substs([GenericArg::from(arg_ty)].into_iter()); + tcx.mk_adt(adt, substs) +} - let cell_adt = tcx.adt_def(cell_struct_did); - let substs = tcx.mk_substs([GenericArg::from(ty)].into_iter()); - tcx.mk_adt(cell_adt, substs) +fn mk_cell<'tcx>(tcx: TyCtxt<'tcx>, ty: ty::Ty<'tcx>) -> ty::Ty<'tcx> { + mk_adt_with_arg(tcx, "core::cell::Cell", ty) } /// Produce a `Ty` reflecting the rewrites indicated by the labels in `rw_lty`. From b79e80048c97c075d2feb9bea8c6d367f7e5d403 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 6 May 2024 16:22:45 -0700 Subject: [PATCH 15/30] analyze: refactor: use PtrDesc instead of (Ownership, Quantity) in rewrite::ty --- c2rust-analyze/src/rewrite/ty.rs | 35 ++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/c2rust-analyze/src/rewrite/ty.rs b/c2rust-analyze/src/rewrite/ty.rs index ab39c35c2..9037dd705 100644 --- a/c2rust-analyze/src/rewrite/ty.rs +++ b/c2rust-analyze/src/rewrite/ty.rs @@ -17,7 +17,7 @@ use crate::labeled_ty::{LabeledTy, LabeledTyCtxt}; use crate::pointee_type::PointeeTypes; use crate::pointer_id::{GlobalPointerTable, PointerId, PointerTable}; use crate::rewrite::Rewrite; -use crate::type_desc::{self, Ownership, Quantity, TypeDesc}; +use crate::type_desc::{self, Ownership, PtrDesc, Quantity, TypeDesc}; use hir::{ FnRetTy, GenericParamKind, Generics, ItemKind, Path, PathSegment, VariantData, WherePredicate, }; @@ -41,7 +41,7 @@ use super::LifetimeName; #[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] struct RewriteLabel<'tcx> { /// Rewrite a raw pointer, whose ownership and quantity have been inferred as indicated. - ty_desc: Option<(Ownership, Quantity)>, + ty_desc: Option, /// Rewrite the pointee type of this pointer. pointee_ty: Option>, /// If set, a child or other descendant of this type requires rewriting. @@ -107,7 +107,7 @@ fn create_rewrite_label<'tcx>( // can be `None` (no rewriting required). This might let us avoid inlining a type // alias for some pointers where no actual improvement was possible. let desc = type_desc::perms_to_desc(pointer_lty.ty, perms, flags); - Some((desc.own, desc.qty)) + Some(desc.into()) } }; @@ -378,11 +378,11 @@ fn mk_rewritten_ty<'tcx>( ) -> ty::Ty<'tcx> { let tcx = *lcx; lcx.rewrite_unlabeled(rw_lty, &mut |ptr_ty, args, label| { - let (ty, own, qty) = match (label.pointee_ty, label.ty_desc) { - (Some(pointee_ty), Some((own, qty))) => { + let (ty, ptr_desc) = match (label.pointee_ty, label.ty_desc) { + (Some(pointee_ty), Some(ptr_desc)) => { // The `ty` should be a pointer. assert_eq!(args.len(), 1); - (pointee_ty, own, qty) + (pointee_ty, ptr_desc) } (Some(pointee_ty), None) => { // Just change the pointee type and nothing else. @@ -396,10 +396,10 @@ fn mk_rewritten_ty<'tcx>( }; return new_ty; } - (None, Some((own, qty))) => { + (None, Some(ptr_desc)) => { // The `ty` should be a pointer; the sole argument is the pointee type. assert_eq!(args.len(), 1); - (args[0], own, qty) + (args[0], ptr_desc) } (None, None) => { // No rewrite to apply. @@ -407,17 +407,21 @@ fn mk_rewritten_ty<'tcx>( } }; - desc_parts_to_ty(tcx, own, qty, ty) + desc_parts_to_ty(tcx, ptr_desc, ty) }) } pub fn desc_parts_to_ty<'tcx>( tcx: TyCtxt<'tcx>, - own: Ownership, - qty: Quantity, + ptr_desc: PtrDesc, pointee_ty: Ty<'tcx>, ) -> Ty<'tcx> { let mut ty = pointee_ty; + let PtrDesc { + own, + qty, + option: _, + } = ptr_desc; if own == Ownership::Cell { ty = mk_cell(tcx, ty); @@ -445,7 +449,7 @@ pub fn desc_parts_to_ty<'tcx>( } pub fn desc_to_ty<'tcx>(tcx: TyCtxt<'tcx>, desc: TypeDesc<'tcx>) -> Ty<'tcx> { - desc_parts_to_ty(tcx, desc.own, desc.qty, desc.pointee_ty) + desc_parts_to_ty(tcx, PtrDesc::from(desc), desc.pointee_ty) } struct HirTyVisitor<'a, 'tcx> { @@ -528,8 +532,13 @@ fn rewrite_ty<'tcx>( Rewrite::Sub(0, hir_args[0].span) }; - if let Some((own, qty)) = rw_lty.label.ty_desc { + if let Some(ptr_desc) = rw_lty.label.ty_desc { assert_eq!(hir_args.len(), 1); + let PtrDesc { + own, + qty, + option: _, + } = ptr_desc; if own == Ownership::Cell { rw = Rewrite::TyCtor("core::cell::Cell".into(), vec![rw]); From 5450c6a01cece898d2a5b6c1876601b58a31d381 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 6 May 2024 16:50:53 -0700 Subject: [PATCH 16/30] analyze: rewrite::ty: implement Option rewriting --- c2rust-analyze/src/rewrite/ty.rs | 24 +++++++++++-------- c2rust-analyze/tests/filecheck/cast.rs | 20 ++++++++-------- .../tests/filecheck/non_null_rewrites.rs | 5 ++++ 3 files changed, 29 insertions(+), 20 deletions(-) diff --git a/c2rust-analyze/src/rewrite/ty.rs b/c2rust-analyze/src/rewrite/ty.rs index 9037dd705..fac688594 100644 --- a/c2rust-analyze/src/rewrite/ty.rs +++ b/c2rust-analyze/src/rewrite/ty.rs @@ -371,6 +371,10 @@ fn mk_cell<'tcx>(tcx: TyCtxt<'tcx>, ty: ty::Ty<'tcx>) -> ty::Ty<'tcx> { mk_adt_with_arg(tcx, "core::cell::Cell", ty) } +fn mk_option<'tcx>(tcx: TyCtxt<'tcx>, ty: ty::Ty<'tcx>) -> ty::Ty<'tcx> { + mk_adt_with_arg(tcx, "core::option::Option", ty) +} + /// Produce a `Ty` reflecting the rewrites indicated by the labels in `rw_lty`. fn mk_rewritten_ty<'tcx>( lcx: LabeledTyCtxt<'tcx, RewriteLabel<'tcx>>, @@ -417,11 +421,7 @@ pub fn desc_parts_to_ty<'tcx>( pointee_ty: Ty<'tcx>, ) -> Ty<'tcx> { let mut ty = pointee_ty; - let PtrDesc { - own, - qty, - option: _, - } = ptr_desc; + let PtrDesc { own, qty, option } = ptr_desc; if own == Ownership::Cell { ty = mk_cell(tcx, ty); @@ -445,6 +445,10 @@ pub fn desc_parts_to_ty<'tcx>( Ownership::Box => tcx.mk_box(ty), }; + if option { + ty = mk_option(tcx, ty); + } + ty } @@ -534,11 +538,7 @@ fn rewrite_ty<'tcx>( if let Some(ptr_desc) = rw_lty.label.ty_desc { assert_eq!(hir_args.len(), 1); - let PtrDesc { - own, - qty, - option: _, - } = ptr_desc; + let PtrDesc { own, qty, option } = ptr_desc; if own == Ownership::Cell { rw = Rewrite::TyCtor("core::cell::Cell".into(), vec![rw]); @@ -562,6 +562,10 @@ fn rewrite_ty<'tcx>( Ownership::Rc => todo!(), Ownership::Box => todo!(), }; + + if option { + rw = Rewrite::TyCtor("core::option::Option".into(), vec![rw]); + } } else { rw = match *rw_lty.ty.kind() { TyKind::Ref(_rg, _ty, mutbl) => Rewrite::TyRef(lifetime_type, Box::new(rw), mutbl), diff --git a/c2rust-analyze/tests/filecheck/cast.rs b/c2rust-analyze/tests/filecheck/cast.rs index 810fd198f..523fbe58d 100644 --- a/c2rust-analyze/tests/filecheck/cast.rs +++ b/c2rust-analyze/tests/filecheck/cast.rs @@ -1,10 +1,4 @@ -// CHECK-DAG: struct S<'h0> { -struct S { - // CHECK-DAG: i: &'h0 i32 - i: *const i32, -} - -// The below ensures the concrete origins for `s` and `s.i` are the same and are hypothetical +// These lines ensure the concrete origins for `s` and `s.i` are the same and are hypothetical // CHECK-DAG: assign {{.*}}#*mut S{{.*}}origin_params: [('h0, Origin([[HYPO_ORIGIN:[0-9]+]]))]{{.*}} = Label{{.*}}origin_params: [('h0, Origin({{.*}}))] // CHECK-DAG: assign Label { origin: Some(Origin([[HYPO_ORIGIN]])){{.*}}*const i32{{.*}} = Label @@ -12,11 +6,17 @@ struct S { pub unsafe fn null_ptr() { // CHECK-DAG: ([[@LINE+3]]: s): addr_of = UNIQUE | NON_NULL, type = READ | WRITE | UNIQUE# // CHECK-LABEL: type assignment for "null_ptr": - // CHECK-DAG: ([[@LINE+1]]: s): &mut S + // CHECK-DAG: ([[@LINE+1]]: s): std::option::Option<&mut S> let s = 0 as *mut S; (*s).i = 0 as *const i32; } +// CHECK-LABEL: struct S<'h0> { +struct S { + // CHECK: i: core::option::Option<&'h0 (i32)> + i: *const i32, +} + #[repr(C)] pub struct Foo { y: *mut i32, @@ -34,9 +34,9 @@ pub unsafe fn cell_as_mut_as_cell(mut x: *mut i32, mut f: Foo) { *z = 1; *r = 1; *z = 4; - // CHECK-DAG: f.y = (x).as_ptr(); + // CHECK: f.y = (x).as_ptr(); f.y = x; - // CHECK-DAG: x = &*((f.y) as *const std::cell::Cell); + // CHECK: x = &*((f.y) as *const std::cell::Cell); x = f.y; } pub struct fdnode { diff --git a/c2rust-analyze/tests/filecheck/non_null_rewrites.rs b/c2rust-analyze/tests/filecheck/non_null_rewrites.rs index 589b8c27a..25de2ed30 100644 --- a/c2rust-analyze/tests/filecheck/non_null_rewrites.rs +++ b/c2rust-analyze/tests/filecheck/non_null_rewrites.rs @@ -27,6 +27,7 @@ unsafe fn call_use_mut(cond: bool) -> i32 { } // CHECK-LABEL: unsafe fn use_mut{{[<(]}} +// CHECK-SAME: p: core::option::Option<&{{('[^ ]* )?}}mut (i32)> unsafe fn use_mut(p: *mut i32) -> i32 { if !p.is_null() { *p = 1; @@ -37,11 +38,13 @@ unsafe fn use_mut(p: *mut i32) -> i32 { } // CHECK-LABEL: unsafe fn use_const{{[<(]}} +// CHECK-SAME: p: core::option::Option<&{{('[^ ]* )?}}(i32)> unsafe fn use_const(p: *const i32) -> i32 { *p } // CHECK-LABEL: unsafe fn call_use_slice{{[<(]}} +// CHECK-SAME: q: &{{('[^ ]* )?}}[(i32)] unsafe fn call_use_slice(cond: bool, q: *const i32) -> i32 { // `q` is not nullable, so `q.is_null()` should be rewritten to `false`. // CHECK: if !false { @@ -59,6 +62,7 @@ unsafe fn call_use_slice(cond: bool, q: *const i32) -> i32 { } // CHECK-LABEL: unsafe fn use_slice{{[<(]}} +// CHECK-SAME: p: core::option::Option<&{{('[^ ]* )?}}[(i32)]> unsafe fn use_slice(p: *const i32) -> i32 { // `p`'s new type is `Option<&[i32]>`, so `is_null()` should become `is_none()`. // CHECK: .is_none() @@ -70,6 +74,7 @@ unsafe fn use_slice(p: *const i32) -> i32 { } // CHECK-LABEL: unsafe fn use_single{{[<(]}} +// CHECK-SAME: p: core::option::Option<&{{('[^ ]* )?}}(i32)> unsafe fn use_single(p: *const i32) -> i32 { *p } From 2ee074fd4d8cc1f79e0befcceb73e15c19f93d48 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Tue, 14 May 2024 13:10:32 -0700 Subject: [PATCH 17/30] analyze: unlower: ignore StatementKind::AscribeUserType --- c2rust-analyze/src/rewrite/expr/unlower.rs | 36 ++++++++++++++++++---- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/c2rust-analyze/src/rewrite/expr/unlower.rs b/c2rust-analyze/src/rewrite/expr/unlower.rs index 20cc29370..9ee18ddd3 100644 --- a/c2rust-analyze/src/rewrite/expr/unlower.rs +++ b/c2rust-analyze/src/rewrite/expr/unlower.rs @@ -712,11 +712,32 @@ fn is_temp_var(mir: &Body, pl: mir::PlaceRef) -> bool { pl.projection.len() == 0 && mir.local_kind(pl.local) == mir::LocalKind::Temp } +/// Indicate whether a given MIR statement should be considered when building the unlowering map. +fn filter_stmt(stmt: &mir::Statement) -> bool { + match stmt.kind { + // Ignore `AscribeUserType` annotations. These appear in the middle of some expressions. + // It's easier to ignore them all at this level rather than try to handle them in all the + // places they might appear. + mir::StatementKind::AscribeUserType(..) => false, + _ => true, + } +} + +/// Indicate whether a given MIR terminator should be considered when building the unlowering map. +fn filter_term(term: &mir::Terminator) -> bool { + match term.kind { + _ => true, + } +} + fn build_span_index(mir: &Body<'_>) -> SpanIndex { eprintln!("building span index for {:?}:", mir.source); let mut span_index_items = Vec::new(); for (bb, bb_data) in mir.basic_blocks().iter_enumerated() { for (i, stmt) in bb_data.statements.iter().enumerate() { + if !filter_stmt(stmt) { + continue; + } let loc = Location { block: bb, statement_index: i, @@ -725,12 +746,15 @@ fn build_span_index(mir: &Body<'_>) -> SpanIndex { span_index_items.push((stmt.source_info.span, loc)); } - let loc = Location { - block: bb, - statement_index: bb_data.statements.len(), - }; - eprintln!(" {:?}: {:?}", loc, bb_data.terminator().source_info.span); - span_index_items.push((bb_data.terminator().source_info.span, loc)); + let term = bb_data.terminator(); + if filter_term(term) { + let loc = Location { + block: bb, + statement_index: bb_data.statements.len(), + }; + eprintln!(" {:?}: {:?}", loc, term.source_info.span); + span_index_items.push((term.source_info.span, loc)); + } } SpanIndex::new(span_index_items) From b62443ea12d4519ed1758e68f6b1590e7764190c Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Tue, 14 May 2024 13:16:31 -0700 Subject: [PATCH 18/30] analyze: rewrite `0 as *const T` to `None` --- c2rust-analyze/src/dataflow/type_check.rs | 4 ++-- c2rust-analyze/src/rewrite/expr/convert.rs | 5 +++++ c2rust-analyze/src/rewrite/expr/mir_op.rs | 17 +++++++++++++++-- c2rust-analyze/src/util.rs | 4 ++++ .../tests/filecheck/non_null_rewrites.rs | 11 ++++++++++- 5 files changed, 36 insertions(+), 5 deletions(-) diff --git a/c2rust-analyze/src/dataflow/type_check.rs b/c2rust-analyze/src/dataflow/type_check.rs index 4bd7e3707..a27fa0022 100644 --- a/c2rust-analyze/src/dataflow/type_check.rs +++ b/c2rust-analyze/src/dataflow/type_check.rs @@ -5,7 +5,7 @@ use crate::pointee_type::PointeeTypes; use crate::pointer_id::PointerTable; use crate::recent_writes::RecentWrites; use crate::util::{ - describe_rvalue, is_null_const, is_transmutable_ptr_cast, ty_callee, Callee, RvalueDesc, + self, describe_rvalue, is_transmutable_ptr_cast, ty_callee, Callee, RvalueDesc, UnknownDefCallee, }; use assert_matches::assert_matches; @@ -120,7 +120,7 @@ impl<'tcx> TypeChecker<'tcx, '_> { CastKind::PointerFromExposedAddress => { // We support only one case here, which is the case of null pointers // constructed via casts such as `0 as *const T` - if !op.constant().copied().map(is_null_const).unwrap_or(false) { + if !util::is_null_const_operand(op) { panic!("Creating non-null pointers from exposed addresses not supported"); } // The target type of the cast must not have `NON_NULL` permission. diff --git a/c2rust-analyze/src/rewrite/expr/convert.rs b/c2rust-analyze/src/rewrite/expr/convert.rs index 12fd01512..64d1c9953 100644 --- a/c2rust-analyze/src/rewrite/expr/convert.rs +++ b/c2rust-analyze/src/rewrite/expr/convert.rs @@ -217,6 +217,11 @@ impl<'tcx> ConvertVisitor<'tcx> { assert!(matches!(hir_rw, Rewrite::Identity)); Rewrite::Text("None".into()) } + mir_op::RewriteKind::ZeroAsPtrToNone => { + // `0 as *const T` -> `None` + assert!(matches!(hir_rw, Rewrite::Identity)); + Rewrite::Text("None".into()) + } mir_op::RewriteKind::MemcpySafe { elem_size, diff --git a/c2rust-analyze/src/rewrite/expr/mir_op.rs b/c2rust-analyze/src/rewrite/expr/mir_op.rs index 8617bb8af..6dbc46ead 100644 --- a/c2rust-analyze/src/rewrite/expr/mir_op.rs +++ b/c2rust-analyze/src/rewrite/expr/mir_op.rs @@ -12,7 +12,7 @@ use crate::panic_detail; use crate::pointee_type::PointeeTypes; use crate::pointer_id::{PointerId, PointerTable}; use crate::type_desc::{self, Ownership, Quantity, TypeDesc}; -use crate::util::{ty_callee, Callee}; +use crate::util::{self, ty_callee, Callee}; use rustc_ast::Mutability; use rustc_middle::mir::{ BasicBlock, Body, Location, Operand, Place, Rvalue, Statement, StatementKind, Terminator, @@ -69,6 +69,8 @@ pub enum RewriteKind { IsNullToConstFalse, /// Replace `ptr::null()` or `ptr::null_mut()` with `None`. PtrNullToNone, + /// Replace `0 as *const T` or `0 as *mut T` with `None`. + ZeroAsPtrToNone, /// Replace a call to `memcpy(dest, src, n)` with a safe copy operation that works on slices /// instead of raw pointers. `elem_size` is the size of the original, unrewritten pointee @@ -569,7 +571,18 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { Rvalue::Len(pl) => { self.enter_rvalue_place(0, |v| v.visit_place(pl)); } - Rvalue::Cast(_kind, ref op, _ty) => { + Rvalue::Cast(_kind, ref op, ty) => { + if util::is_null_const_operand(op) && ty.is_unsafe_ptr() { + // Special case: convert `0 as *const T` to `None`. + if let Some(rv_lty) = expect_ty { + if !self.perms[rv_lty.label].contains(PermissionSet::NON_NULL) + && !self.flags[rv_lty.label].contains(FlagSet::FIXED) + { + self.emit(RewriteKind::ZeroAsPtrToNone); + } + } + } + self.enter_rvalue_operand(0, |v| v.visit_operand(op, None)); if let Some(rv_lty) = expect_ty { let op_lty = self.acx.type_of(op); diff --git a/c2rust-analyze/src/util.rs b/c2rust-analyze/src/util.rs index 9c6902ecf..037f3a46d 100644 --- a/c2rust-analyze/src/util.rs +++ b/c2rust-analyze/src/util.rs @@ -430,6 +430,10 @@ pub fn is_null_const(constant: Constant) -> bool { } } +pub fn is_null_const_operand(op: &Operand) -> bool { + op.constant().copied().map_or(false, is_null_const) +} + pub trait PhantomLifetime<'a> {} impl<'a, T: ?Sized> PhantomLifetime<'a> for T {} diff --git a/c2rust-analyze/tests/filecheck/non_null_rewrites.rs b/c2rust-analyze/tests/filecheck/non_null_rewrites.rs index 25de2ed30..591f97871 100644 --- a/c2rust-analyze/tests/filecheck/non_null_rewrites.rs +++ b/c2rust-analyze/tests/filecheck/non_null_rewrites.rs @@ -4,7 +4,15 @@ use std::ptr; pub unsafe fn f(cond: bool, p: *mut i32) { let mut p = p; if cond { + // Both ways of writing null constants should be rewritten to `None`. + + // CHECK: p = None; p = ptr::null_mut(); + // CHECK: [[@LINE-1]]: ptr::null_mut(): + + // CHECK: p = None; + p = 0 as *mut _; + // CHECK: [[@LINE-1]]: 0 as *mut _: } @@ -18,9 +26,10 @@ pub unsafe fn f(cond: bool, p: *mut i32) { unsafe fn call_use_mut(cond: bool) -> i32 { let mut x = 1; let p = if cond { - + // CHECK: Some(&mut (x)) ptr::addr_of_mut!(x) } else { + // CHECK: None ptr::null_mut() }; use_mut(p) From 88ce97c873cae120416a72181807a0e715600579 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Wed, 15 May 2024 15:36:57 -0700 Subject: [PATCH 19/30] analyze: unlower: include sub-places in the unlower map --- c2rust-analyze/src/rewrite/expr/mir_op.rs | 22 +- c2rust-analyze/src/rewrite/expr/unlower.rs | 222 ++++++++++++++++++--- 2 files changed, 209 insertions(+), 35 deletions(-) diff --git a/c2rust-analyze/src/rewrite/expr/mir_op.rs b/c2rust-analyze/src/rewrite/expr/mir_op.rs index 6dbc46ead..a1b15c842 100644 --- a/c2rust-analyze/src/rewrite/expr/mir_op.rs +++ b/c2rust-analyze/src/rewrite/expr/mir_op.rs @@ -41,8 +41,12 @@ pub enum SubLoc { RvaluePlace(usize), /// The place referenced by an operand. `Operand::Move/Operand::Copy -> Place` OperandPlace, - /// The pointer used in the Nth innermost deref within a place. `Place -> Place` - PlacePointer(usize), + /// The pointer used in a deref projection. `Place -> Place` + PlaceDerefPointer, + /// The base of a field projection. `Place -> Place` + PlaceFieldBase, + /// The array used in an index or slice projection. `Place -> Place` + PlaceIndexArray, } #[derive(Clone, PartialEq, Eq, Debug)] @@ -218,14 +222,20 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { self.enter(SubLoc::RvaluePlace(i), f) } - #[allow(dead_code)] fn _enter_operand_place R, R>(&mut self, f: F) -> R { self.enter(SubLoc::OperandPlace, f) } - #[allow(dead_code)] - fn _enter_place_pointer R, R>(&mut self, i: usize, f: F) -> R { - self.enter(SubLoc::PlacePointer(i), f) + fn _enter_place_deref_pointer R, R>(&mut self, f: F) -> R { + self.enter(SubLoc::PlaceDerefPointer, f) + } + + fn _enter_place_field_base R, R>(&mut self, f: F) -> R { + self.enter(SubLoc::PlaceFieldBase, f) + } + + fn _enter_place_index_array R, R>(&mut self, f: F) -> R { + self.enter(SubLoc::PlaceIndexArray, f) } /// Get the pointee type of `lty`. Returns the inferred pointee type from `self.pointee_types` diff --git a/c2rust-analyze/src/rewrite/expr/unlower.rs b/c2rust-analyze/src/rewrite/expr/unlower.rs index 9ee18ddd3..558e9c8d3 100644 --- a/c2rust-analyze/src/rewrite/expr/unlower.rs +++ b/c2rust-analyze/src/rewrite/expr/unlower.rs @@ -114,15 +114,10 @@ impl<'a, 'tcx> UnlowerVisitor<'a, 'tcx> { } fn operand_desc(&self, op: &Operand<'tcx>) -> MirOriginDesc { - match *op { - mir::Operand::Copy(pl) | mir::Operand::Move(pl) => { - if is_temp_var(self.mir, pl.as_ref()) { - MirOriginDesc::LoadFromTemp - } else { - MirOriginDesc::Expr - } - } - mir::Operand::Constant(..) => MirOriginDesc::Expr, + if is_temp_var_operand(self.mir, op) { + MirOriginDesc::LoadFromTemp + } else { + MirOriginDesc::Expr } } @@ -215,20 +210,16 @@ impl<'a, 'tcx> UnlowerVisitor<'a, 'tcx> { match ex.kind { hir::ExprKind::Assign(pl, rv, _span) => { // For `Assign`, we expect the assignment to be the whole thing. - let (loc, _mir_pl, mir_rv) = match self.get_sole_assign(&locs) { + let (loc, mir_pl, mir_rv) = match self.get_sole_assign(&locs) { Some(x) => x, None => { warn("expected exactly one StatementKind::Assign"); return; } }; - let desc = match mir_rv { - mir::Rvalue::Use(op) => self.operand_desc(op), - _ => MirOriginDesc::Expr, - }; self.record(loc, &[], ex); - self.record(loc, &[SubLoc::Dest], pl); - self.record_desc(loc, &[SubLoc::Rvalue], rv, desc); + self.visit_expr_place(pl, loc, vec![SubLoc::Dest], mir_pl, &[]); + self.visit_expr_rvalue(rv, loc, vec![SubLoc::Rvalue], mir_rv, &[]); } hir::ExprKind::Call(_, args) | hir::ExprKind::MethodCall(_, args, _) => { @@ -275,8 +266,9 @@ impl<'a, 'tcx> UnlowerVisitor<'a, 'tcx> { self.record(loc, &[SubLoc::Rvalue], ex); for (i, (arg, mir_arg)) in args.iter().zip(mir_args).enumerate() { - self.record_operand(loc, &[SubLoc::Rvalue, SubLoc::CallArg(i)], arg, mir_arg); - self.visit_expr_operand(arg, mir_arg, &[]); + let sub_loc = vec![SubLoc::Rvalue, SubLoc::CallArg(i)]; + self.record_operand(loc, &sub_loc, arg, mir_arg); + self.visit_expr_operand(arg, loc, sub_loc, mir_arg, &[]); } if extra_locs.len() > 0 { @@ -306,8 +298,7 @@ impl<'a, 'tcx> UnlowerVisitor<'a, 'tcx> { } }; self.record_desc(cursor.loc, &[], ex, MirOriginDesc::StoreIntoLocal); - self.peel_adjustments(ex, &mut cursor); - self.record_desc(cursor.loc, &cursor.sub_loc, ex, MirOriginDesc::Expr); + self.walk_expr(ex, &mut cursor); self.finish_visit_expr_cursor(ex, cursor); } } @@ -436,14 +427,124 @@ impl<'a, 'tcx> UnlowerVisitor<'a, 'tcx> { } } + fn visit_expr_rvalue( + &mut self, + ex: &'tcx hir::Expr<'tcx>, + loc: Location, + sub_loc: Vec, + rv: &'a mir::Rvalue<'tcx>, + extra_locs: &[Location], + ) { + let _g = panic_detail::set_current_span(ex.span); + + // TODO: We do this check early to ensure that the `LoadFromTemp` is emitted with subloc + // `[Rvalue]` rather than `[Rvalue, RvalueOperand(0)]`, since `mir_op` emits rewrites with + // just `[Rvalue]`. For `Rvalue::Use`, the rvalue and its operand are basically + // synonymous, so ideally we would accept both sublocs as well. We should probably add an + // aliasing system or some kind of cleverness in `distribute` to make both versions work, + // or else modify the definition of `SubLoc` so there's only one way to express this + // location. + if is_temp_var_rvalue(self.mir, rv) { + self.record_desc(loc, &sub_loc, ex, MirOriginDesc::LoadFromTemp); + return; + } + + let mut cursor = VisitExprCursor::new( + self.mir, + ExprMir::Rvalue(rv), + extra_locs, + loc, + sub_loc.to_owned(), + ); + self.walk_expr(ex, &mut cursor); + + self.finish_visit_expr_cursor(ex, cursor); + } + fn visit_expr_operand( &mut self, ex: &'tcx hir::Expr<'tcx>, - _rv: &'a mir::Operand<'tcx>, - _extra_locs: &[Location], + loc: Location, + sub_loc: Vec, + op: &'a mir::Operand<'tcx>, + extra_locs: &[Location], + ) { + let _g = panic_detail::set_current_span(ex.span); + + if is_temp_var_operand(self.mir, op) { + self.record_desc(loc, &sub_loc, ex, MirOriginDesc::LoadFromTemp); + return; + } + + let mut cursor = VisitExprCursor::new( + self.mir, + ExprMir::Operand(op), + extra_locs, + loc, + sub_loc.to_owned(), + ); + self.walk_expr(ex, &mut cursor); + + self.finish_visit_expr_cursor(ex, cursor); + } + + fn visit_expr_place( + &mut self, + ex: &'tcx hir::Expr<'tcx>, + loc: Location, + sub_loc: Vec, + pl: mir::Place<'tcx>, + extra_locs: &[Location], ) { let _g = panic_detail::set_current_span(ex.span); - // TODO: handle adjustments, overloaded operators, etc + + let mut cursor = VisitExprCursor::new( + self.mir, + ExprMir::Place(pl.as_ref()), + extra_locs, + loc, + sub_loc.to_owned(), + ); + self.walk_expr(ex, &mut cursor); + + self.finish_visit_expr_cursor(ex, cursor); + } + + fn walk_expr(&mut self, ex: &'tcx hir::Expr<'tcx>, cursor: &mut VisitExprCursor<'a, '_, 'tcx>) { + let mut ex = ex; + loop { + self.peel_adjustments(ex, cursor); + if cursor.is_temp_var() { + self.record_desc(cursor.loc, &cursor.sub_loc, ex, MirOriginDesc::LoadFromTemp); + break; + } else { + self.record_desc(cursor.loc, &cursor.sub_loc, ex, MirOriginDesc::Expr); + } + + match ex.kind { + hir::ExprKind::Unary(hir::UnOp::Deref, ptr_ex) => { + if let Some(()) = cursor.peel_deref() { + ex = ptr_ex; + continue; + } + } + hir::ExprKind::Field(base_ex, _field_ident) => { + if let Some(()) = cursor.peel_field() { + ex = base_ex; + continue; + } + } + hir::ExprKind::Index(arr_ex, _idx_ex) => { + if let Some(()) = cursor.peel_index() { + ex = arr_ex; + continue; + } + } + _ => {} + } + // Keep looping only in cases that we specifically recognize. + break; + } } } @@ -526,6 +627,16 @@ impl<'a, 'b, 'tcx> VisitExprCursor<'a, 'b, 'tcx> { } } + /// Check whether the current MIR is a temporary. + pub fn is_temp_var(&self) -> bool { + match self.cur { + ExprMir::Rvalue(rv) => is_temp_var_rvalue(self.mir, rv), + ExprMir::Operand(op) => is_temp_var_operand(self.mir, op), + ExprMir::Place(pl) => is_temp_var(self.mir, pl), + ExprMir::Call(_) => false, + } + } + /// If the current MIR is a temporary, and the previous `Location` is an assignment to /// that temporary, peel it off, leaving the temporary's initializer as the current /// `Rvalue`. This also adds `LoadFromTemp` and `StoreIntoLocal` entries in `self.temp_info` @@ -673,12 +784,37 @@ impl<'a, 'b, 'tcx> VisitExprCursor<'a, 'b, 'tcx> { if let Some((&outer_proj, remaining_proj)) = pl.projection.split_last() { if matches!(outer_proj, mir::PlaceElem::Deref) { pl.projection = remaining_proj; - // Number of derefs, not counting the outermost one we just peeled off. - let num_inner_derefs = remaining_proj - .iter() - .filter(|p| matches!(p, mir::PlaceElem::Deref)) - .count(); - self.sub_loc.push(SubLoc::PlacePointer(num_inner_derefs)); + self.sub_loc.push(SubLoc::PlaceDerefPointer); + return Some(()); + } + } + self.peel_temp()?; + } + } + + /// If the current MIR is a `Place` ending with a `Field` projection, peel off the `Field`. + pub fn peel_field(&mut self) -> Option<()> { + loop { + let pl = self.require_place_mut()?; + if let Some((&outer_proj, remaining_proj)) = pl.projection.split_last() { + if matches!(outer_proj, mir::PlaceElem::Field(_idx, _ty)) { + pl.projection = remaining_proj; + self.sub_loc.push(SubLoc::PlaceFieldBase); + return Some(()); + } + } + self.peel_temp()?; + } + } + + /// If the current MIR is a `Place` ending with an `Index` projection, peel off the `Index`. + pub fn peel_index(&mut self) -> Option<()> { + loop { + let pl = self.require_place_mut()?; + if let Some((&outer_proj, remaining_proj)) = pl.projection.split_last() { + if matches!(outer_proj, mir::PlaceElem::Index(_)) { + pl.projection = remaining_proj; + self.sub_loc.push(SubLoc::PlaceIndexArray); return Some(()); } } @@ -712,6 +848,34 @@ fn is_temp_var(mir: &Body, pl: mir::PlaceRef) -> bool { pl.projection.len() == 0 && mir.local_kind(pl.local) == mir::LocalKind::Temp } +fn is_temp_var_operand(mir: &Body, op: &mir::Operand) -> bool { + match get_operand_place(op) { + Some(pl) => is_temp_var(mir, pl.as_ref()), + None => false, + } +} + +fn is_temp_var_rvalue(mir: &Body, rv: &mir::Rvalue) -> bool { + match get_rvalue_operand(rv) { + Some(op) => is_temp_var_operand(mir, op), + None => false, + } +} + +fn get_rvalue_operand<'a, 'tcx>(rv: &'a mir::Rvalue<'tcx>) -> Option<&'a mir::Operand<'tcx>> { + match *rv { + mir::Rvalue::Use(ref op) => Some(op), + _ => None, + } +} + +fn get_operand_place<'tcx>(op: &mir::Operand<'tcx>) -> Option> { + match *op { + mir::Operand::Copy(pl) | mir::Operand::Move(pl) => Some(pl), + _ => None, + } +} + /// Indicate whether a given MIR statement should be considered when building the unlowering map. fn filter_stmt(stmt: &mir::Statement) -> bool { match stmt.kind { From fb6b3c16d80a561d7b22aac4b343b008483b10be Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Wed, 15 May 2024 15:38:22 -0700 Subject: [PATCH 20/30] analyze: rewrite: generate unwrap() calls on nullable ptr derefs --- c2rust-analyze/src/rewrite/expr/mir_op.rs | 61 ++++++++++++++++--- .../tests/filecheck/non_null_rewrites.rs | 2 + 2 files changed, 54 insertions(+), 9 deletions(-) diff --git a/c2rust-analyze/src/rewrite/expr/mir_op.rs b/c2rust-analyze/src/rewrite/expr/mir_op.rs index a1b15c842..7ec2ff506 100644 --- a/c2rust-analyze/src/rewrite/expr/mir_op.rs +++ b/c2rust-analyze/src/rewrite/expr/mir_op.rs @@ -15,8 +15,8 @@ use crate::type_desc::{self, Ownership, Quantity, TypeDesc}; use crate::util::{self, ty_callee, Callee}; use rustc_ast::Mutability; use rustc_middle::mir::{ - BasicBlock, Body, Location, Operand, Place, Rvalue, Statement, StatementKind, Terminator, - TerminatorKind, + BasicBlock, Body, Location, Operand, Place, PlaceElem, PlaceRef, Rvalue, Statement, + StatementKind, Terminator, TerminatorKind, }; use rustc_middle::ty::print::FmtPrinter; use rustc_middle::ty::print::Print; @@ -222,19 +222,19 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { self.enter(SubLoc::RvaluePlace(i), f) } - fn _enter_operand_place R, R>(&mut self, f: F) -> R { + fn enter_operand_place R, R>(&mut self, f: F) -> R { self.enter(SubLoc::OperandPlace, f) } - fn _enter_place_deref_pointer R, R>(&mut self, f: F) -> R { + fn enter_place_deref_pointer R, R>(&mut self, f: F) -> R { self.enter(SubLoc::PlaceDerefPointer, f) } - fn _enter_place_field_base R, R>(&mut self, f: F) -> R { + fn enter_place_field_base R, R>(&mut self, f: F) -> R { self.enter(SubLoc::PlaceFieldBase, f) } - fn _enter_place_index_array R, R>(&mut self, f: F) -> R { + fn enter_place_index_array R, R>(&mut self, f: F) -> R { self.enter(SubLoc::PlaceIndexArray, f) } @@ -661,7 +661,7 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { fn visit_operand(&mut self, op: &Operand<'tcx>, expect_ty: Option>) { match *op { Operand::Copy(pl) | Operand::Move(pl) => { - self.visit_place(pl); + self.enter_operand_place(|v| v.visit_place(pl)); if let Some(expect_ty) = expect_ty { let ptr_lty = self.acx.type_of(pl); @@ -689,8 +689,51 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { } } - fn visit_place(&mut self, _pl: Place<'tcx>) { - // TODO: walk over `pl` to handle all derefs (casts, `*x` -> `(*x).get()`) + fn visit_place(&mut self, pl: Place<'tcx>) { + let mut ltys = Vec::with_capacity(1 + pl.projection.len()); + ltys.push(self.acx.type_of(pl.local)); + for proj in pl.projection { + let prev_lty = ltys.last().copied().unwrap(); + ltys.push(self.acx.projection_lty(prev_lty, &proj)); + } + self.visit_place_ref(pl.as_ref(), <ys); + } + + /// Generate rewrites for a `Place` represented as a `PlaceRef`. `proj_ltys` gives the `LTy` + /// for the `Local` and after each projection. + fn visit_place_ref(&mut self, pl: PlaceRef<'tcx>, proj_ltys: &[LTy<'tcx>]) { + let (&last_proj, rest) = match pl.projection.split_last() { + Some(x) => x, + None => return, + }; + + debug_assert!(pl.projection.len() >= 1); + // `LTy` of the base place, before the last projection. + let base_lty = proj_ltys[pl.projection.len() - 1]; + // `LTy` resulting from applying `last_proj` to `base_lty`. + let _proj_lty = proj_ltys[pl.projection.len()]; + + let base_pl = PlaceRef { + local: pl.local, + projection: rest, + }; + match last_proj { + PlaceElem::Deref => { + self.enter_place_deref_pointer(|v| { + v.visit_place_ref(base_pl, proj_ltys); + if !v.perms[base_lty.label].contains(PermissionSet::NON_NULL) { + v.emit(RewriteKind::OptionUnwrap); + } + }); + } + PlaceElem::Field(_idx, _ty) => { + self.enter_place_field_base(|v| v.visit_place_ref(base_pl, proj_ltys)); + } + PlaceElem::Index(_) | PlaceElem::ConstantIndex { .. } | PlaceElem::Subslice { .. } => { + self.enter_place_index_array(|v| v.visit_place_ref(base_pl, proj_ltys)); + } + PlaceElem::Downcast(_, _) => {} + } } fn visit_ptr_offset(&mut self, op: &Operand<'tcx>, result_ty: LTy<'tcx>) { diff --git a/c2rust-analyze/tests/filecheck/non_null_rewrites.rs b/c2rust-analyze/tests/filecheck/non_null_rewrites.rs index 591f97871..e726522b4 100644 --- a/c2rust-analyze/tests/filecheck/non_null_rewrites.rs +++ b/c2rust-analyze/tests/filecheck/non_null_rewrites.rs @@ -39,6 +39,7 @@ unsafe fn call_use_mut(cond: bool) -> i32 { // CHECK-SAME: p: core::option::Option<&{{('[^ ]* )?}}mut (i32)> unsafe fn use_mut(p: *mut i32) -> i32 { if !p.is_null() { + // CHECK: *(p).unwrap() = 1; *p = 1; } // CHECK: use_const @@ -49,6 +50,7 @@ unsafe fn use_mut(p: *mut i32) -> i32 { // CHECK-LABEL: unsafe fn use_const{{[<(]}} // CHECK-SAME: p: core::option::Option<&{{('[^ ]* )?}}(i32)> unsafe fn use_const(p: *const i32) -> i32 { + // CHECK: *(p).unwrap() *p } From 0451e2c72bee36d3ec914220c1fe0c9161f28e2b Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Wed, 15 May 2024 16:23:53 -0700 Subject: [PATCH 21/30] analyze: rewrite: downgrade Option before unwrapping for deref --- c2rust-analyze/src/rewrite/expr/mir_op.rs | 74 +++++++++++++------ c2rust-analyze/src/type_desc.rs | 9 +++ .../tests/filecheck/non_null_rewrites.rs | 2 +- 3 files changed, 60 insertions(+), 25 deletions(-) diff --git a/c2rust-analyze/src/rewrite/expr/mir_op.rs b/c2rust-analyze/src/rewrite/expr/mir_op.rs index 7ec2ff506..0eea5c76f 100644 --- a/c2rust-analyze/src/rewrite/expr/mir_op.rs +++ b/c2rust-analyze/src/rewrite/expr/mir_op.rs @@ -15,7 +15,7 @@ use crate::type_desc::{self, Ownership, Quantity, TypeDesc}; use crate::util::{self, ty_callee, Callee}; use rustc_ast::Mutability; use rustc_middle::mir::{ - BasicBlock, Body, Location, Operand, Place, PlaceElem, PlaceRef, Rvalue, Statement, + BasicBlock, Body, BorrowKind, Location, Operand, Place, PlaceElem, PlaceRef, Rvalue, Statement, StatementKind, Terminator, TerminatorKind, }; use rustc_middle::ty::print::FmtPrinter; @@ -339,7 +339,7 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { self.enter_rvalue(|v| v.visit_rvalue(rv, Some(rv_lty))); // The cast from `rv_lty` to `pl_lty` should be applied to the RHS. self.enter_rvalue(|v| v.emit_cast_lty_lty(rv_lty, pl_lty)); - self.enter_dest(|v| v.visit_place(pl)); + self.enter_dest(|v| v.visit_place(pl, true)); } StatementKind::FakeRead(..) => {} StatementKind::SetDiscriminant { .. } => todo!("statement {:?}", stmt), @@ -553,14 +553,18 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { Rvalue::Repeat(ref op, _) => { self.enter_rvalue_operand(0, |v| v.visit_operand(op, None)); } - Rvalue::Ref(_rg, _kind, pl) => { - self.enter_rvalue_place(0, |v| v.visit_place(pl)); + Rvalue::Ref(_rg, kind, pl) => { + let mutbl = match kind { + BorrowKind::Mut { .. } => true, + BorrowKind::Shared | BorrowKind::Shallow | BorrowKind::Unique => false, + }; + self.enter_rvalue_place(0, |v| v.visit_place(pl, mutbl)); } Rvalue::ThreadLocalRef(_def_id) => { // TODO } Rvalue::AddressOf(mutbl, pl) => { - self.enter_rvalue_place(0, |v| v.visit_place(pl)); + self.enter_rvalue_place(0, |v| v.visit_place(pl, mutbl == Mutability::Mut)); if let Some(expect_ty) = expect_ty { let desc = type_desc::perms_to_desc_with_pointee( self.acx.tcx(), @@ -579,7 +583,7 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { } } Rvalue::Len(pl) => { - self.enter_rvalue_place(0, |v| v.visit_place(pl)); + self.enter_rvalue_place(0, |v| v.visit_place(pl, false)); } Rvalue::Cast(_kind, ref op, ty) => { if util::is_null_const_operand(op) && ty.is_unsafe_ptr() { @@ -640,7 +644,7 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { self.enter_rvalue_operand(0, |v| v.visit_operand(op, None)); } Rvalue::Discriminant(pl) => { - self.enter_rvalue_place(0, |v| v.visit_place(pl)); + self.enter_rvalue_place(0, |v| v.visit_place(pl, false)); } Rvalue::Aggregate(ref _kind, ref ops) => { for (i, op) in ops.iter().enumerate() { @@ -651,7 +655,7 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { self.enter_rvalue_operand(0, |v| v.visit_operand(op, None)); } Rvalue::CopyForDeref(pl) => { - self.enter_rvalue_place(0, |v| v.visit_place(pl)); + self.enter_rvalue_place(0, |v| v.visit_place(pl, false)); } } } @@ -661,7 +665,7 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { fn visit_operand(&mut self, op: &Operand<'tcx>, expect_ty: Option>) { match *op { Operand::Copy(pl) | Operand::Move(pl) => { - self.enter_operand_place(|v| v.visit_place(pl)); + self.enter_operand_place(|v| v.visit_place(pl, false)); if let Some(expect_ty) = expect_ty { let ptr_lty = self.acx.type_of(pl); @@ -678,7 +682,7 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { fn visit_operand_desc(&mut self, op: &Operand<'tcx>, expect_desc: TypeDesc<'tcx>) { match *op { Operand::Copy(pl) | Operand::Move(pl) => { - self.visit_place(pl); + self.visit_place(pl, false); let ptr_lty = self.acx.type_of(pl); if !ptr_lty.label.is_none() { @@ -689,19 +693,25 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { } } - fn visit_place(&mut self, pl: Place<'tcx>) { + fn visit_place(&mut self, pl: Place<'tcx>, in_mutable_context: bool) { let mut ltys = Vec::with_capacity(1 + pl.projection.len()); ltys.push(self.acx.type_of(pl.local)); for proj in pl.projection { let prev_lty = ltys.last().copied().unwrap(); ltys.push(self.acx.projection_lty(prev_lty, &proj)); } - self.visit_place_ref(pl.as_ref(), <ys); + self.visit_place_ref(pl.as_ref(), <ys, in_mutable_context); } /// Generate rewrites for a `Place` represented as a `PlaceRef`. `proj_ltys` gives the `LTy` - /// for the `Local` and after each projection. - fn visit_place_ref(&mut self, pl: PlaceRef<'tcx>, proj_ltys: &[LTy<'tcx>]) { + /// for the `Local` and after each projection. `in_mutable_context` is `true` if the `Place` + /// is in a mutable context, such as the LHS of an assignment. + fn visit_place_ref( + &mut self, + pl: PlaceRef<'tcx>, + proj_ltys: &[LTy<'tcx>], + in_mutable_context: bool, + ) { let (&last_proj, rest) = match pl.projection.split_last() { Some(x) => x, None => return, @@ -720,17 +730,36 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { match last_proj { PlaceElem::Deref => { self.enter_place_deref_pointer(|v| { - v.visit_place_ref(base_pl, proj_ltys); - if !v.perms[base_lty.label].contains(PermissionSet::NON_NULL) { + v.visit_place_ref(base_pl, proj_ltys, in_mutable_context); + if !v.perms[base_lty.label].contains(PermissionSet::NON_NULL) + && !v.flags[base_lty.label].contains(FlagSet::FIXED) + { + // If the pointer type is non-copy, downgrade (borrow) before calling + // `unwrap()`. + let desc = type_desc::perms_to_desc( + base_lty.ty, + v.perms[base_lty.label], + v.flags[base_lty.label], + ); + if !desc.own.is_copy() { + v.emit(RewriteKind::OptionDowngrade { + mutbl: in_mutable_context, + deref: true, + }); + } v.emit(RewriteKind::OptionUnwrap); } }); } PlaceElem::Field(_idx, _ty) => { - self.enter_place_field_base(|v| v.visit_place_ref(base_pl, proj_ltys)); + self.enter_place_field_base(|v| { + v.visit_place_ref(base_pl, proj_ltys, in_mutable_context) + }); } PlaceElem::Index(_) | PlaceElem::ConstantIndex { .. } | PlaceElem::Subslice { .. } => { - self.enter_place_index_array(|v| v.visit_place_ref(base_pl, proj_ltys)); + self.enter_place_index_array(|v| { + v.visit_place_ref(base_pl, proj_ltys, in_mutable_context) + }); } PlaceElem::Downcast(_, _) => {} } @@ -934,11 +963,8 @@ where // moving/consuming the input. For example, if the `from` type is `Option>` and // `to` is `&mut T`, we start by calling `p.as_mut().as_deref()`, which gives // `Option<&mut T>` without consuming `p`. - match from.own { - Ownership::Raw | Ownership::RawMut | Ownership::Imm | Ownership::Cell => { - // No-op. The `from` type is `Copy`, so we can unwrap it without consequence. - } - Ownership::Mut | Ownership::Rc | Ownership::Box => match to.own { + if !from.own.is_copy() { + match to.own { Ownership::Raw | Ownership::Imm => { (self.emit)(RewriteKind::OptionDowngrade { mutbl: false, @@ -956,7 +982,7 @@ where _ => { // Remaining cases are unsupported. } - }, + } } } diff --git a/c2rust-analyze/src/type_desc.rs b/c2rust-analyze/src/type_desc.rs index 8c4a201a0..dbfc5eb2d 100644 --- a/c2rust-analyze/src/type_desc.rs +++ b/c2rust-analyze/src/type_desc.rs @@ -74,6 +74,15 @@ impl PtrDesc { } } +impl Ownership { + pub fn is_copy(&self) -> bool { + match *self { + Ownership::Raw | Ownership::RawMut | Ownership::Imm | Ownership::Cell => true, + Ownership::Mut | Ownership::Rc | Ownership::Box => false, + } + } +} + fn perms_to_ptr_desc(perms: PermissionSet, flags: FlagSet) -> PtrDesc { let own = if perms.contains(PermissionSet::UNIQUE | PermissionSet::WRITE) { Ownership::Mut diff --git a/c2rust-analyze/tests/filecheck/non_null_rewrites.rs b/c2rust-analyze/tests/filecheck/non_null_rewrites.rs index e726522b4..13b9b7c20 100644 --- a/c2rust-analyze/tests/filecheck/non_null_rewrites.rs +++ b/c2rust-analyze/tests/filecheck/non_null_rewrites.rs @@ -39,7 +39,7 @@ unsafe fn call_use_mut(cond: bool) -> i32 { // CHECK-SAME: p: core::option::Option<&{{('[^ ]* )?}}mut (i32)> unsafe fn use_mut(p: *mut i32) -> i32 { if !p.is_null() { - // CHECK: *(p).unwrap() = 1; + // CHECK: *(p).as_deref_mut().unwrap() = 1; *p = 1; } // CHECK: use_const From 87483f38efb7c1b0c3be46c2384248c8c02ba74d Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Wed, 15 May 2024 16:44:02 -0700 Subject: [PATCH 22/30] analyze: check mut-to-imm Option downgrade in non_null_rewrites test --- .../tests/filecheck/non_null_rewrites.rs | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/c2rust-analyze/tests/filecheck/non_null_rewrites.rs b/c2rust-analyze/tests/filecheck/non_null_rewrites.rs index 13b9b7c20..06d9e132a 100644 --- a/c2rust-analyze/tests/filecheck/non_null_rewrites.rs +++ b/c2rust-analyze/tests/filecheck/non_null_rewrites.rs @@ -37,7 +37,7 @@ unsafe fn call_use_mut(cond: bool) -> i32 { // CHECK-LABEL: unsafe fn use_mut{{[<(]}} // CHECK-SAME: p: core::option::Option<&{{('[^ ]* )?}}mut (i32)> -unsafe fn use_mut(p: *mut i32) -> i32 { +unsafe fn use_mut(mut p: *mut i32) -> i32 { if !p.is_null() { // CHECK: *(p).as_deref_mut().unwrap() = 1; *p = 1; @@ -89,3 +89,20 @@ unsafe fn use_slice(p: *const i32) -> i32 { unsafe fn use_single(p: *const i32) -> i32 { *p } + + +// CHECK-LABEL: unsafe fn downgrade_mut_to_imm_on_deref{{[<(]}} +// CHECK-SAME: p: core::option::Option<&{{('[^ ]* )?}}mut (i32)> +unsafe fn downgrade_mut_to_imm_on_deref(cond: bool, mut p: *mut i32) -> i32 { + if cond { + // Ensure `p` is wrapped in `Option`. + p = ptr::null_mut(); + } + // Ensure `p` is mutable. + *p = 1; + // This read needs a downgrade via `as_deref()` to avoid moving `p: Option<&mut _>`. + // CHECK: let x = *(p).as_deref().unwrap(); + let x = *p; + *p = 2; + x +} From 03c46287ae95d7c811cee329d4d93838326da7f3 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Thu, 16 May 2024 11:08:46 -0700 Subject: [PATCH 23/30] analyze: fix non_null_rewrites test to work with FileCheck-7 We previously used the syntax `[[[var]] ..]`, which should expand to `[value ..]`, but FileCheck-7 parses the first part as `[[ [var ]]` and reports a syntax error. --- c2rust-analyze/tests/filecheck/non_null_rewrites.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c2rust-analyze/tests/filecheck/non_null_rewrites.rs b/c2rust-analyze/tests/filecheck/non_null_rewrites.rs index 06d9e132a..b76ba1d7d 100644 --- a/c2rust-analyze/tests/filecheck/non_null_rewrites.rs +++ b/c2rust-analyze/tests/filecheck/non_null_rewrites.rs @@ -17,7 +17,7 @@ pub unsafe fn f(cond: bool, p: *mut i32) { // CHECK: let ([[arr:.+]], [[idx:.+]], ) = ((p), (3) as usize, ); - // CHECK-NEXT: [[arr]].map(|arr| &arr[[[idx]] ..]) + // CHECK-NEXT: [[arr]].map(|arr| &arr{{\[}}[[idx]] ..]) let q = p.offset(3); } From 15bfdd61cbb92e3727e656a19fb75116c1aa3875 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 3 Jun 2024 09:45:13 -0700 Subject: [PATCH 24/30] analyze: rewrite::ty: fix crate name in mk_adt_with_arg --- c2rust-analyze/src/rewrite/ty.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/c2rust-analyze/src/rewrite/ty.rs b/c2rust-analyze/src/rewrite/ty.rs index fac688594..8b5cf1210 100644 --- a/c2rust-analyze/src/rewrite/ty.rs +++ b/c2rust-analyze/src/rewrite/ty.rs @@ -33,6 +33,7 @@ use rustc_middle::mir::{self, Body, LocalDecl}; use rustc_middle::ty::print::{FmtPrinter, Print}; use rustc_middle::ty::{self, AdtDef, GenericArg, GenericArgKind, List, ReErased, TyCtxt}; use rustc_middle::ty::{Ty, TyKind, TypeAndMut}; +use rustc_span::symbol::Symbol; use rustc_span::Span; use super::LifetimeName; @@ -338,12 +339,13 @@ fn mk_adt_with_arg<'tcx>(tcx: TyCtxt<'tcx>, path: &str, arg_ty: ty::Ty<'tcx>) -> let crate_name = path_parts_iter .next() .unwrap_or_else(|| panic!("couldn't find crate name in {path:?}")); + let crate_name = Symbol::intern(crate_name); let krate = tcx .crates(()) .iter() .cloned() - .find(|&krate| tcx.crate_name(krate).as_str() == "core") + .find(|&krate| tcx.crate_name(krate) == crate_name) .unwrap_or_else(|| panic!("couldn't find crate {crate_name:?} for {path:?}")); let mut cur_did = krate.as_def_id(); From f79caef676ebb58e2b7110b5e11e447697bf84d5 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 3 Jun 2024 09:51:24 -0700 Subject: [PATCH 25/30] analyze: rewrite::convert: fix comment in zeroize case --- c2rust-analyze/src/rewrite/expr/convert.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c2rust-analyze/src/rewrite/expr/convert.rs b/c2rust-analyze/src/rewrite/expr/convert.rs index 64d1c9953..d1c3ff94a 100644 --- a/c2rust-analyze/src/rewrite/expr/convert.rs +++ b/c2rust-analyze/src/rewrite/expr/convert.rs @@ -258,7 +258,7 @@ impl<'tcx> ConvertVisitor<'tcx> { elem_size, dest_single, } => { - // `memcpy(dest, src, n)` to a `copy_from_slice` call + // `memset(dest, 0, n)` to assignments that zero out each field of `*dest` assert!(matches!(hir_rw, Rewrite::Identity)); let zeroize_body = if dest_single { Rewrite::Text(generate_zeroize_code(zero_ty, "(*dest)")) From b16282e495544b249169c092fc1adede4d50633e Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 3 Jun 2024 10:09:52 -0700 Subject: [PATCH 26/30] analyze: rewrite::mir_op: reformat assert in Callee::Null case --- c2rust-analyze/src/rewrite/expr/mir_op.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/c2rust-analyze/src/rewrite/expr/mir_op.rs b/c2rust-analyze/src/rewrite/expr/mir_op.rs index 0eea5c76f..56c8e07cc 100644 --- a/c2rust-analyze/src/rewrite/expr/mir_op.rs +++ b/c2rust-analyze/src/rewrite/expr/mir_op.rs @@ -519,13 +519,11 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { Callee::Null { .. } => { self.enter_rvalue(|v| { if !v.flags[pl_ty.label].contains(FlagSet::FIXED) { - let arg_non_null = - v.perms[pl_ty.label].contains(PermissionSet::NON_NULL); - if arg_non_null { - panic!("impossible: result of null() is a NON_NULL pointer?"); - } else { - v.emit(RewriteKind::PtrNullToNone); - } + assert!( + !v.perms[pl_ty.label].contains(PermissionSet::NON_NULL), + "impossible: result of null() is a NON_NULL pointer?" + ); + v.emit(RewriteKind::PtrNullToNone); } }); } From c7d0f2fe510717f1341fee5c59b1e687d541a3eb Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 3 Jun 2024 10:17:04 -0700 Subject: [PATCH 27/30] analyze: rewrite::mir_op: fix comment on OptionDowngrade case --- c2rust-analyze/src/rewrite/expr/mir_op.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c2rust-analyze/src/rewrite/expr/mir_op.rs b/c2rust-analyze/src/rewrite/expr/mir_op.rs index 56c8e07cc..5134fca79 100644 --- a/c2rust-analyze/src/rewrite/expr/mir_op.rs +++ b/c2rust-analyze/src/rewrite/expr/mir_op.rs @@ -959,7 +959,7 @@ where if from.option && from.own != to.own { // Downgrade ownership before unwrapping the `Option` when possible. This can avoid // moving/consuming the input. For example, if the `from` type is `Option>` and - // `to` is `&mut T`, we start by calling `p.as_mut().as_deref()`, which gives + // `to` is `&mut T`, we start by calling `p.as_deref_mut()`, which produces // `Option<&mut T>` without consuming `p`. if !from.own.is_copy() { match to.own { From 6c3e7af8568a382c5d714bc4ef00ce80e2eb9a14 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 3 Jun 2024 10:27:31 -0700 Subject: [PATCH 28/30] analyze: rewrite::convert: clarify comment about nested OptionMapBegin/End --- c2rust-analyze/src/rewrite/expr/convert.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/c2rust-analyze/src/rewrite/expr/convert.rs b/c2rust-analyze/src/rewrite/expr/convert.rs index d1c3ff94a..2a9991cb0 100644 --- a/c2rust-analyze/src/rewrite/expr/convert.rs +++ b/c2rust-analyze/src/rewrite/expr/convert.rs @@ -323,7 +323,10 @@ impl<'tcx> ConvertVisitor<'tcx> { ) -> Option<(&[DistRewrite], &[DistRewrite])> { for (i, mir_rw) in mir_rws.iter().enumerate() { match mir_rw.rw { - // Bail out if we see nested delimiters. + // Bail out if we see nested delimiters. This prevents the special + // `Option::map` rewrite from applying, so the caller will fall back on the + // `unwrap()` + `Some(_)` rewrites for `OptionMapBegin/End` that are + // implemented in `convert_cast_rewrite`. mir_op::RewriteKind::OptionMapBegin => return None, mir_op::RewriteKind::OptionMapEnd => { let (a, b) = mir_rws.split_at(i); From af3980da27f78a2f4acf0855b70c5220342b5b22 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Tue, 25 Jun 2024 11:50:40 -0700 Subject: [PATCH 29/30] analyze: replace some debug prints with log macros --- c2rust-analyze/src/rewrite/expr/mir_op.rs | 12 +++++++----- c2rust-analyze/src/rewrite/expr/unlower.rs | 6 +++--- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/c2rust-analyze/src/rewrite/expr/mir_op.rs b/c2rust-analyze/src/rewrite/expr/mir_op.rs index 5134fca79..f8de4b2f8 100644 --- a/c2rust-analyze/src/rewrite/expr/mir_op.rs +++ b/c2rust-analyze/src/rewrite/expr/mir_op.rs @@ -13,6 +13,7 @@ use crate::pointee_type::PointeeTypes; use crate::pointer_id::{PointerId, PointerTable}; use crate::type_desc::{self, Ownership, Quantity, TypeDesc}; use crate::util::{self, ty_callee, Callee}; +use log::trace; use rustc_ast::Mutability; use rustc_middle::mir::{ BasicBlock, Body, BorrowKind, Location, Operand, Place, PlaceElem, PlaceRef, Rvalue, Statement, @@ -990,17 +991,18 @@ where (self.emit)(RewriteKind::OptionUnwrap); from.option = false; } else if from.option && to.option { - eprintln!("try_build_cast_desc_desc: emit OptionMapBegin"); + trace!("try_build_cast_desc_desc: emit OptionMapBegin"); if from.own != to.own { - eprintln!(" own differs: {:?} != {:?}", from.own, to.own); + trace!(" own differs: {:?} != {:?}", from.own, to.own); } if from.qty != to.qty { - eprintln!(" qty differs: {:?} != {:?}", from.qty, to.qty); + trace!(" qty differs: {:?} != {:?}", from.qty, to.qty); } if from.pointee_ty != to.pointee_ty { - eprintln!( + trace!( " pointee_ty differs: {:?} != {:?}", - from.pointee_ty, to.pointee_ty + from.pointee_ty, + to.pointee_ty ); } (self.emit)(RewriteKind::OptionMapBegin); diff --git a/c2rust-analyze/src/rewrite/expr/unlower.rs b/c2rust-analyze/src/rewrite/expr/unlower.rs index 558e9c8d3..4905072e2 100644 --- a/c2rust-analyze/src/rewrite/expr/unlower.rs +++ b/c2rust-analyze/src/rewrite/expr/unlower.rs @@ -895,7 +895,7 @@ fn filter_term(term: &mir::Terminator) -> bool { } fn build_span_index(mir: &Body<'_>) -> SpanIndex { - eprintln!("building span index for {:?}:", mir.source); + debug!("building span index for {:?}:", mir.source); let mut span_index_items = Vec::new(); for (bb, bb_data) in mir.basic_blocks().iter_enumerated() { for (i, stmt) in bb_data.statements.iter().enumerate() { @@ -906,7 +906,7 @@ fn build_span_index(mir: &Body<'_>) -> SpanIndex { block: bb, statement_index: i, }; - eprintln!(" {:?}: {:?}", loc, stmt.source_info.span); + debug!(" {:?}: {:?}", loc, stmt.source_info.span); span_index_items.push((stmt.source_info.span, loc)); } @@ -916,7 +916,7 @@ fn build_span_index(mir: &Body<'_>) -> SpanIndex { block: bb, statement_index: bb_data.statements.len(), }; - eprintln!(" {:?}: {:?}", loc, term.source_info.span); + debug!(" {:?}: {:?}", loc, term.source_info.span); span_index_items.push((term.source_info.span, loc)); } } From 73652be982092d696ab519ae55f9dc239dca7fb4 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Tue, 25 Jun 2024 12:09:59 -0700 Subject: [PATCH 30/30] analyze: rewrite: clarify option downgrade no-op cases --- c2rust-analyze/src/rewrite/expr/mir_op.rs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/c2rust-analyze/src/rewrite/expr/mir_op.rs b/c2rust-analyze/src/rewrite/expr/mir_op.rs index f8de4b2f8..a490b80de 100644 --- a/c2rust-analyze/src/rewrite/expr/mir_op.rs +++ b/c2rust-analyze/src/rewrite/expr/mir_op.rs @@ -13,7 +13,7 @@ use crate::pointee_type::PointeeTypes; use crate::pointer_id::{PointerId, PointerTable}; use crate::type_desc::{self, Ownership, Quantity, TypeDesc}; use crate::util::{self, ty_callee, Callee}; -use log::trace; +use log::{error, trace}; use rustc_ast::Mutability; use rustc_middle::mir::{ BasicBlock, Body, BorrowKind, Location, Operand, Place, PlaceElem, PlaceRef, Rvalue, Statement, @@ -963,6 +963,8 @@ where // `to` is `&mut T`, we start by calling `p.as_deref_mut()`, which produces // `Option<&mut T>` without consuming `p`. if !from.own.is_copy() { + // Note that all non-`Copy` ownership types are also safe. We don't reach this + // code when `from.own` is `Raw` or `RawMut`. match to.own { Ownership::Raw | Ownership::Imm => { (self.emit)(RewriteKind::OptionDowngrade { @@ -978,8 +980,17 @@ where }); from.own = Ownership::Mut; } + Ownership::Rc if from.own == Ownership::Rc => { + // `p.clone()` allows using an `Option>` without consuming the + // original. However, `RewriteKind::Clone` is not yet implemented. + error!("Option -> Option clone rewrite NYI"); + } _ => { - // Remaining cases are unsupported. + // Remaining cases don't have a valid downgrade operation. We leave them + // as is, and the `unwrap`/`map` operations below will consume the original + // value. Some cases are also impossible to implement, like casting from + // `Rc` to `Box`, which will be caught when attempting the `qty`/`own` + // casts below. } } }