Skip to content

Commit

Permalink
feat(replays): Key-based scrubbing (#2034)
Browse files Browse the repository at this point in the history
Instead of scrubbing all string values in type 5 replay payloads and
pretending that their key is `""`, make the scrubber aware of the
current path in the payload, and restrict scrubbing to a predefined set
of fields (as we do for error events with `pii=true`).

By passing the current path into the PII processor, default key-based
rules such as the
[`PASSWORD_KEY_REGEX`](https://github.com/getsentry/relay/blob/3a81590c33b196c32c0c546ef93c7db9b1614ddd/relay-general/src/pii/regexes.rs#L275-L279)
are now enabled as well.

In order to use the existing PII matchers, I had to modify
`ProcessingState` to allow taking ownership of its parent.

Fixes #2007.

---------

Co-authored-by: Oleksandr <1931331+olksdr@users.noreply.github.com>
  • Loading branch information
jjbayer and olksdr committed Apr 20, 2023
1 parent 9fe3bea commit 3ec78a1
Show file tree
Hide file tree
Showing 7 changed files with 631 additions and 123 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## Unreleased

**Features**:

- Scrub sensitive keys (`passwd`, `token`, ...) in Replay recording data. ([#2034](https://github.com/getsentry/relay/pull/2034))

**Internal**:

- Include unknown feature flags in project config when serializing it. ([#2040](https://github.com/getsentry/relay/pull/2040))
Expand Down
96 changes: 79 additions & 17 deletions relay-general/src/processor/attrs.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::borrow::Cow;
use std::fmt;
use std::ops::RangeInclusive;
use std::ops::{Deref, RangeInclusive};

use enumset::{EnumSet, EnumSetType};
use smallvec::SmallVec;
Expand Down Expand Up @@ -328,47 +328,73 @@ impl Default for FieldAttrs {
#[derive(Debug, Clone, Eq, Ord, PartialOrd)]
enum PathItem<'a> {
StaticKey(&'a str),
OwnedKey(String),
Index(usize),
}

impl<'a> PartialEq for PathItem<'a> {
fn eq(&self, other: &PathItem<'a>) -> bool {
match *self {
PathItem::StaticKey(s) => other.key() == Some(s),
PathItem::Index(value) => other.index() == Some(value),
}
self.key() == other.key() && self.index() == other.index()
}
}

impl<'a> PathItem<'a> {
/// Returns the key if there is one
#[inline]
pub fn key(&self) -> Option<&str> {
match *self {
match self {
PathItem::StaticKey(s) => Some(s),
PathItem::OwnedKey(s) => Some(s.as_str()),
PathItem::Index(_) => None,
}
}

/// Returns the index if there is one
#[inline]
pub fn index(&self) -> Option<usize> {
match *self {
PathItem::StaticKey(_) => None,
PathItem::Index(idx) => Some(idx),
match self {
PathItem::StaticKey(_) | PathItem::OwnedKey(_) => None,
PathItem::Index(idx) => Some(*idx),
}
}
}

impl<'a> fmt::Display for PathItem<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self {
match self {
PathItem::StaticKey(s) => f.pad(s),
PathItem::OwnedKey(s) => f.pad(s.as_str()),
PathItem::Index(val) => write!(f, "{val}"),
}
}
}

/// Like [`std::borrow::Cow`], but with a boxed value.
///
/// This is useful for types that contain themselves, where otherwise the layout of the type
/// cannot be computed, for example
///
/// ```rust,ignore
/// struct Foo<'a>(Cow<'a, Foo<'a>>); // will not compile
/// struct Bar<'a>(BoxCow<'a, Bar<'a>>); // will compile
/// ```
#[derive(Debug, Clone)]
enum BoxCow<'a, T> {
Borrowed(&'a T),
Owned(Box<T>),
}

impl<T> Deref for BoxCow<'_, T> {
type Target = T;

fn deref(&self) -> &Self::Target {
match self {
BoxCow::Borrowed(inner) => inner,
BoxCow::Owned(inner) => inner.deref(),
}
}
}

/// An event's processing state.
///
/// The processing state describes an item in an event which is being processed, an example
Expand All @@ -384,7 +410,12 @@ impl<'a> fmt::Display for PathItem<'a> {
/// the path items in the processing state and check whether a selector matches.
#[derive(Debug, Clone)]
pub struct ProcessingState<'a> {
parent: Option<&'a ProcessingState<'a>>,
// In event scrubbing, every state holds a reference to its parent.
// In Replay scrubbing, we do not call `process_*` recursively,
// but instead hold a single `ProcessingState` that represents the current item.
// This item owns its parent (plus ancestors) exclusively, which is why we use `BoxCow` here
// rather than `Rc` / `Arc`.
parent: Option<BoxCow<'a, ProcessingState<'a>>>,
path_item: Option<PathItem<'a>>,
attrs: Option<Cow<'a, FieldAttrs>>,
value_type: EnumSet<ValueType>,
Expand Down Expand Up @@ -427,7 +458,7 @@ impl<'a> ProcessingState<'a> {
value_type: impl IntoIterator<Item = ValueType>,
) -> Self {
ProcessingState {
parent: Some(self),
parent: Some(BoxCow::Borrowed(self)),
path_item: Some(PathItem::StaticKey(key)),
attrs,
value_type: value_type.into_iter().collect(),
Expand All @@ -443,14 +474,33 @@ impl<'a> ProcessingState<'a> {
value_type: impl IntoIterator<Item = ValueType>,
) -> Self {
ProcessingState {
parent: Some(self),
parent: Some(BoxCow::Borrowed(self)),
path_item: Some(PathItem::StaticKey(key)),
attrs,
value_type: value_type.into_iter().collect(),
depth: self.depth + 1,
}
}

/// Derives a processing state by entering an owned key.
///
/// The new (child) state takes ownership of the current (parent) state.
pub fn enter_owned(
self,
key: String,
attrs: Option<Cow<'a, FieldAttrs>>,
value_type: impl IntoIterator<Item = ValueType>,
) -> Self {
let depth = self.depth + 1;
ProcessingState {
parent: Some(BoxCow::Owned(self.into())),
path_item: Some(PathItem::OwnedKey(key)),
attrs,
value_type: value_type.into_iter().collect(),
depth,
}
}

/// Derives a processing state by entering an index.
pub fn enter_index(
&'a self,
Expand All @@ -459,7 +509,7 @@ impl<'a> ProcessingState<'a> {
value_type: impl IntoIterator<Item = ValueType>,
) -> Self {
ProcessingState {
parent: Some(self),
parent: Some(BoxCow::Borrowed(self)),
path_item: Some(PathItem::Index(idx)),
attrs,
value_type: value_type.into_iter().collect(),
Expand All @@ -472,7 +522,7 @@ impl<'a> ProcessingState<'a> {
ProcessingState {
attrs,
path_item: None,
parent: Some(self),
parent: Some(BoxCow::Borrowed(self)),
..self.clone()
}
}
Expand Down Expand Up @@ -514,6 +564,18 @@ impl<'a> ProcessingState<'a> {
}
}

/// Returns the contained parent state.
///
/// - Returns `Ok(None)` if the current state is the root.
/// - Returns `Err(self)` if the current state does not own the parent state.
pub fn try_into_parent(self) -> Result<Option<Self>, Self> {
match self.parent {
Some(BoxCow::Borrowed(_)) => Err(self),
Some(BoxCow::Owned(parent)) => Ok(Some(*parent)),
None => Ok(None),
}
}

/// Return the depth (~ indentation level) of the currently processed value.
pub fn depth(&'a self) -> usize {
self.depth
Expand All @@ -523,7 +585,7 @@ impl<'a> ProcessingState<'a> {
///
/// This is `false` when we entered a newtype struct.
pub fn entered_anything(&'a self) -> bool {
if let Some(parent) = self.parent {
if let Some(parent) = &self.parent {
parent.depth() != self.depth()
} else {
true
Expand Down Expand Up @@ -553,7 +615,7 @@ impl<'a> Iterator for ProcessingStateIter<'a> {

fn next(&mut self) -> Option<Self::Item> {
let current = self.state?;
self.state = current.parent;
self.state = current.parent.as_deref();
Some(current)
}

Expand Down
Loading

0 comments on commit 3ec78a1

Please sign in to comment.