Skip to content

Commit

Permalink
feat(general): Scrub all fields with IP address (#1725)
Browse files Browse the repository at this point in the history
This change will scrub all fields containing an IP address, rather than
only scrubbing certain fields known to have them

referencing issue #1693

Co-authored-by: Oleksandr <1931331+olksdr@users.noreply.github.com>
Co-authored-by: Joris Bayer <joris.bayer@sentry.io>
  • Loading branch information
3 people committed Jan 12, 2023
1 parent 588e46a commit 156c2ae
Show file tree
Hide file tree
Showing 8 changed files with 227 additions and 43 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
- Add max replay size configuration parameter. ([#1694](https://github.com/getsentry/relay/pull/1694))
- Add nonchunked replay recording message type. ([#1653](https://github.com/getsentry/relay/pull/1653))
- Add `abnormal_mechanism` field to SessionUpdate protocol. ([#1665](https://github.com/getsentry/relay/pull/1665))
- Scrub all fields with IP addresses rather than only known IP address fields. ([#1725](https://github.com/getsentry/relay/pull/1725))

**Bug Fixes**:

Expand Down
72 changes: 41 additions & 31 deletions relay-general/src/pii/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,6 @@ use crate::pii::{
};
use crate::processor::{SelectorPathItem, SelectorSpec, ValueType};

// XXX: Move to @ip rule for better IP address scrubbing. Right now we just try to keep
// compatibility with Python.
static KNOWN_IP_FIELDS: Lazy<SelectorSpec> = Lazy::new(|| {
"($request.env.REMOTE_ADDR | $user.ip_address | $sdk.client_ip)"
.parse()
.unwrap()
});

/// Fields that the legacy data scrubber cannot strip.
///
/// We define this list independently of `metastructure(pii = true/false)` because the new PII
Expand All @@ -27,6 +19,13 @@ static DATASCRUBBER_IGNORE: Lazy<SelectorSpec> = Lazy::new(|| {
.unwrap()
});

/// Fields that are known to contain IPs. Used for legacy IP scrubbing.
static KNOWN_IP_FIELDS: Lazy<SelectorSpec> = Lazy::new(|| {
"($request.env.REMOTE_ADDR | $user.ip_address | $sdk.client_ip)"
.parse()
.unwrap()
});

pub fn to_pii_config(
datascrubbing_config: &DataScrubbingConfig,
) -> Result<Option<PiiConfig>, PiiConfigError> {
Expand All @@ -39,7 +38,11 @@ pub fn to_pii_config(
}

if datascrubbing_config.scrub_ip_addresses {
// legacy(?) scrubs all fields that are known to have IPs regardless of actual content
applications.insert(KNOWN_IP_FIELDS.clone(), vec!["@anything:remove".to_owned()]);

// checks actual contents of all fields and scrubs where there is an IP address
applied_rules.push("@ip:replace".to_owned());
}

if datascrubbing_config.scrub_data {
Expand Down Expand Up @@ -217,7 +220,8 @@ THd+9FBxiHLGXNKhG/FRSyREXEt+NyYIf/0cyByc9tNksat794ddUqnLOg0vwSkv
},
"applications": {
"($string || $number || $array) && !(debug_meta.** || $frame.filename || $frame.abs_path || $logentry.formatted || $error.value)": [
"@common:filter"
"@common:filter",
"@ip:replace"
],
"$http.env.REMOTE_ADDR || $user.ip_address || $sdk.client_ip": [
"@anything:remove"
Expand All @@ -242,7 +246,8 @@ THd+9FBxiHLGXNKhG/FRSyREXEt+NyYIf/0cyByc9tNksat794ddUqnLOg0vwSkv
},
"applications": {
"($string || $number || $array) && !(debug_meta.** || $frame.filename || $frame.abs_path || $logentry.formatted || $error.value)": [
"@common:filter"
"@common:filter",
"@ip:replace"
],
"$http.env.REMOTE_ADDR || $user.ip_address || $sdk.client_ip": [
"@anything:remove"
Expand Down Expand Up @@ -277,6 +282,7 @@ THd+9FBxiHLGXNKhG/FRSyREXEt+NyYIf/0cyByc9tNksat794ddUqnLOg0vwSkv
"applications": {
"($string || $number || $array) && !(debug_meta.** || $frame.filename || $frame.abs_path || $logentry.formatted || $error.value)": [
"@common:filter",
"@ip:replace",
"strip-fields"
],
"$http.env.REMOTE_ADDR || $user.ip_address || $sdk.client_ip": [
Expand Down Expand Up @@ -315,7 +321,8 @@ THd+9FBxiHLGXNKhG/FRSyREXEt+NyYIf/0cyByc9tNksat794ddUqnLOg0vwSkv
},
"applications": {
"($string || $number || $array) && !(debug_meta.** || $frame.filename || $frame.abs_path || $logentry.formatted || $error.value) && !foobar": [
"@common:filter"
"@common:filter",
"@ip:replace"
],
"$http.env.REMOTE_ADDR || $user.ip_address || $sdk.client_ip": [
"@anything:remove"
Expand All @@ -341,6 +348,9 @@ THd+9FBxiHLGXNKhG/FRSyREXEt+NyYIf/0cyByc9tNksat794ddUqnLOg0vwSkv
"hashKey": null
},
"applications": {
"($string || $number || $array) && !(debug_meta.** || $frame.filename || $frame.abs_path || $logentry.formatted || $error.value)": [
"@ip:replace"
],
"$http.env.REMOTE_ADDR || $user.ip_address || $sdk.client_ip": [
"@anything:remove"
]
Expand Down Expand Up @@ -409,23 +419,6 @@ THd+9FBxiHLGXNKhG/FRSyREXEt+NyYIf/0cyByc9tNksat794ddUqnLOg0vwSkv
assert_annotated_snapshot!(data);
}

#[test]
fn test_sdk_client_ip_stripped() {
let mut data = Event::from_value(
serde_json::json!({
"sdk": {
"client_ip": "127.0.0.1"
}
})
.into(),
);

let pii_config = simple_enabled_pii_config();
let mut pii_processor = PiiProcessor::new(pii_config.compiled());
process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap();
assert_annotated_snapshot!(data);
}

#[test]
fn test_user() {
let mut data = Event::from_value(
Expand All @@ -445,14 +438,30 @@ THd+9FBxiHLGXNKhG/FRSyREXEt+NyYIf/0cyByc9tNksat794ddUqnLOg0vwSkv
assert_annotated_snapshot!(data);
}

/// Checks that any fields containing an IP-address is scrubbed.
/// Even if it doesn't make sense for there to be an IP-address
/// such as in the username field.
#[test]
fn test_user_ip_stripped() {
fn test_ip_stripped() {
let mut data = Event::from_value(
serde_json::json!({
"user": {
"username": "secret",
"ip_address": "73.133.27.120",
"username": "73.133.27.120", // should be stripped despite not being "known ip field"
"ip_address": "should be stripped despite lacking ip address",
"data": sensitive_vars()
},
"breadcrumbs": {
"values": [
{
"message": "73.133.27.120",
"data": {
"test_data": "73.133.27.120" // test deep wildcard stripping
}
},
],
},
"sdk": {
"client_ip": "should also be stripped"
}
})
.into(),
Expand Down Expand Up @@ -1231,6 +1240,7 @@ THd+9FBxiHLGXNKhG/FRSyREXEt+NyYIf/0cyByc9tNksat794ddUqnLOg0vwSkv
"applications": {
"($string || $number || $array) && !(debug_meta.** || $frame.filename || $frame.abs_path || $logentry.formatted || $error.value)": [
"@common:filter",
"@ip:replace",
"strip-fields"
],
"$http.env.REMOTE_ADDR || $user.ip_address || $sdk.client_ip": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,18 @@ expression: data
"REMOTE_ADDR": {
"": {
"rem": [
[
"@ip:replace",
"s",
0,
4
],
[
"@anything:remove",
"x"
]
]
],
"len": 9
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
---
source: relay-general/src/pii/convert.rs
expression: data
---
{
"user": {
"ip_address": null,
"username": "[ip]",
"data": {
"a_password_here": "hello",
"apiKey": "secret_key",
"api_key": "secret_key",
"foo": "bar",
"password": "hello",
"the_secret": "hello"
}
},
"breadcrumbs": {
"values": [
{
"message": "[ip]",
"data": {
"test_data": "[ip]"
}
}
]
},
"sdk": {
"client_ip": null
},
"_meta": {
"breadcrumbs": {
"values": {
"0": {
"data": {
"test_data": {
"": {
"rem": [
[
"@ip:replace",
"s",
0,
4
]
],
"len": 13
}
}
},
"message": {
"": {
"rem": [
[
"@ip:replace",
"s",
0,
4
]
],
"len": 13
}
}
}
}
},
"sdk": {
"client_ip": {
"": {
"err": [
[
"invalid_data",
{
"reason": "expected an ip address"
}
]
],
"val": "should also be stripped"
}
}
},
"user": {
"ip_address": {
"": {
"err": [
[
"invalid_data",
{
"reason": "expected an ip address"
}
]
],
"val": "should be stripped despite lacking ip address"
}
},
"username": {
"": {
"rem": [
[
"@ip:replace",
"s",
0,
4
]
],
"len": 13
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ expression: pii_config
},
"applications": {
"($string || $number || $array) && !(debug_meta.** || $frame.filename || $frame.abs_path || $logentry.formatted || $error.value) && !url && !message && !'http.request.url' && !'*url*' && !'*message*' && !'*http.request.url*'": [
"@common:filter"
"@common:filter",
"@ip:replace"
],
"$http.env.REMOTE_ADDR || $user.ip_address || $sdk.client_ip": [
"@anything:remove"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,21 @@ expression: data
---
{
"sdk": {
"client_ip": null
"client_ip": "[ip]"
},
"_meta": {
"sdk": {
"client_ip": {
"": {
"rem": [
[
"@anything:remove",
"x"
"@ip:replace",
"s",
0,
4
]
]
],
"len": 9
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,18 @@ expression: data
"ip_address": {
"": {
"rem": [
[
"@ip:replace",
"s",
0,
4
],
[
"@anything:remove",
"x"
]
]
],
"len": 13
}
},
"username": {
Expand Down

0 comments on commit 156c2ae

Please sign in to comment.