Skip to content

Commit

Permalink
fix(tags): Allow filtering by both IPv4 and IPv6 tags on EventUsers (#…
Browse files Browse the repository at this point in the history
…71474)

Resolves #71461

Prior, when filtering by IP, regardless of which type the address was,
we'd apply an IPv4 and IPv6 filter (`toIPv4` and `toIPv6`). For v4s this
works fine since the `toIPv4` resolves and `toIPv6` allows for passing
v4s. When passing in v6s though, `toIPv4` fails loudly and returns a 500
from the API.

Now, we'll just create two conditions (v4 and v6) and add in all the
valid IPs for each form of address after validating them as one or the
other. The code was also a bit confusing so it got a heavy refactor, and
a few test changes.
  • Loading branch information
leeandher committed May 24, 2024
1 parent e43b8ce commit d3a44e0
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 49 deletions.
77 changes: 42 additions & 35 deletions src/sentry/utils/eventuser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from dataclasses import dataclass
from datetime import datetime
from functools import cached_property
from ipaddress import IPv4Address, IPv6Address, ip_address
from typing import Any

from snuba_sdk import (
Expand Down Expand Up @@ -35,14 +36,14 @@

REFERRER = "sentry.utils.eventuser"

SNUBA_KEYWORD_MAP = BidirectionalMapping(
{
("user_id"): "id",
("user_name"): "username",
("user_email"): "email",
("ip_address_v4", "ip_address_v6"): "ip",
}
)

SNUBA_KEYWORD_SET = {"id", "username", "email", "ip"}
# Keyword 'ip' is a special case since we need to handle IPv4/IPv6 differently
SNUBA_KEYWORD_COLUMN_MAP = {
"id": "user_id",
"username": "user_name",
"email": "user_email",
}

# The order of these keys are significant to also indicate priority
# when used in hashing and determining uniqueness. If you change the order
Expand All @@ -55,13 +56,36 @@
"ip_address": "ip",
}
)

SNUBA_COLUMN_COALASCE = {"ip_address_v4": "toIPv4", "ip_address_v6": "toIPv6"}
MAX_QUERY_TRIES = 5
OVERFETCH_FACTOR = 10
MAX_FETCH_SIZE = 10_000


def get_ip_address_conditions(ip_addresses: list[str]) -> list[Condition]:
"""
Returns a list of Snuba Conditions for filtering a list of mixed IPv4/IPv6 addresses.
Silently ignores invalid IP addresses, and applies `Op.IN` to the `ip_address_v4` and/or `ip_address_v6` columns.
"""
ipv4_addresses = []
ipv6_addresses = []
for ip in ip_addresses:
try:
valid_ip = ip_address(ip)
except ValueError:
continue
if type(valid_ip) is IPv4Address:
ipv4_addresses.append(Function("toIPv4", parameters=[ip]))
elif type(valid_ip) is IPv6Address:
ipv6_addresses.append(Function("toIPv6", parameters=[ip]))

conditions = []
if len(ipv4_addresses) > 0:
conditions.append(Condition(Column("ip_address_v4"), Op.IN, ipv4_addresses))
if len(ipv6_addresses) > 0:
conditions.append(Condition(Column("ip_address_v6"), Op.IN, ipv6_addresses))
return conditions


@dataclass
class EventUser:
project_id: int | None
Expand Down Expand Up @@ -114,32 +138,15 @@ def for_projects(
]

keyword_where_conditions = []
for keyword, value in keyword_filters.items():
if not isinstance(value, list):
for keyword, filter_list in keyword_filters.items():
if not isinstance(filter_list, list):
raise ValueError(f"{keyword} filter must be a list of values")

snuba_column = SNUBA_KEYWORD_MAP.get_key(keyword)
if isinstance(snuba_column, tuple):
for filter_value in value:
keyword_where_conditions.append(
BooleanCondition(
BooleanOp.OR,
[
Condition(
Column(column),
Op.IN,
value
if SNUBA_COLUMN_COALASCE.get(column, None) is None
else Function(
SNUBA_COLUMN_COALASCE.get(column), parameters=[filter_value]
),
)
for column in snuba_column
],
)
)
else:
keyword_where_conditions.append(Condition(Column(snuba_column), Op.IN, value))
if keyword not in SNUBA_KEYWORD_SET:
continue
if (snuba_column := SNUBA_KEYWORD_COLUMN_MAP.get(keyword)) is not None:
keyword_where_conditions.append(Condition(Column(snuba_column), Op.IN, filter_list))
elif keyword == "ip":
keyword_where_conditions.extend(get_ip_address_conditions(filter_list))

if len(keyword_where_conditions) > 1:
where_conditions.append(
Expand Down
61 changes: 47 additions & 14 deletions tests/sentry/utils/test_eventuser.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def test_for_projects_query_with_multiple_eventuser_entries_different_ips(self):
assert eusers[0].ip_address == self.event_2.data.get("user").get("ip_address")

def test_for_projects_query_with_multiple_eventuser_entries_different_ips_query_by_ip(self):
for i in range(10):
for i in range(5):
self.store_event(
data={
"user": {
Expand All @@ -186,19 +186,52 @@ def test_for_projects_query_with_multiple_eventuser_entries_different_ips_query_
},
project_id=self.project.id,
)
self.store_event(
data={
"user": {
"id": "gru",
"email": "gru@universal.com",
"username": "gru",
"ip_address": f"2001:0db8:0000:85a3:0000:0000:ac1f:800{i}",
},
"timestamp": iso_format(before_now(seconds=50 + i)),
},
project_id=self.project.id,
)
self.store_event(
data={
"user": {
"id": "scarlet",
"email": "scarlet@universal.com",
"username": "scarlet",
"ip_address": f"2001:db8:0:85a3::ac1f:{i}008",
},
"timestamp": iso_format(before_now(seconds=60 + i)),
},
project_id=self.project.id,
)

eusers = EventUser.for_projects(
[self.project],
{"ip": ["8.8.8.8", "1.1.1.4"]},
{
"ip": [
"2001:0db8:0000:85a3:0000:0000:ac1f:3008",
"2001:db8:0:85a3::ac1f:8001",
"8.8.8.4",
"1.1.1.2",
]
},
filter_boolean=BooleanOp.OR,
)
assert len(eusers) == 2
assert eusers[0].user_ident == self.event_3.data.get("user").get("id")
assert eusers[0].email == self.event_3.data.get("user").get("email")
assert eusers[0].ip_address == self.event_3.data.get("user").get("ip_address")
assert eusers[1].user_ident == self.event_2.data.get("user").get("id")
assert eusers[1].email == self.event_2.data.get("user").get("email")
assert eusers[1].ip_address == "1.1.1.4"
assert len(eusers) == 4
assert eusers[0].email == "nisanthan@sentry.io"
assert eusers[0].ip_address == "1.1.1.2"
assert eusers[1].email == "minion@universal.com"
assert eusers[1].ip_address == "8.8.8.4"
assert eusers[2].email == "gru@universal.com"
assert eusers[2].ip_address == "2001:db8:0:85a3::ac1f:8001"
assert eusers[3].email == "scarlet@universal.com"
assert eusers[3].ip_address == "2001:db8:0:85a3::ac1f:3008"

@mock.patch("sentry.analytics.record")
def test_for_projects_query_with_multiple_eventuser_entries_different_ips_query_by_username(
Expand All @@ -223,7 +256,7 @@ def test_for_projects_query_with_multiple_eventuser_entries_different_ips_query_
"id": "myminion",
"email": "minion@universal.com",
"username": "minion",
"ip_address": f"8.8.8.{i}",
"ip_address": f"2001:0db8:0000:85a3:0000:0000:ac1f:800{i}",
},
"timestamp": iso_format(before_now(seconds=40 + i)),
},
Expand Down Expand Up @@ -261,8 +294,8 @@ def test_for_projects_query_with_multiple_eventuser_entries_different_ips_query_
f"AND user_name IN array('nisanthan', 'minion')\n"
f"ORDER BY latest_timestamp DESC",
query_try=0,
count_rows_returned=20,
count_rows_filtered=18,
count_rows_returned=21,
count_rows_filtered=19,
query_time_ms=0,
),
call(
Expand Down Expand Up @@ -302,7 +335,7 @@ def test_for_projects_multiple_query(self, mock_record):
"id": "test2",
"email": email_2,
"username": "test2",
"ip_address": f"8.8.8.{i}",
"ip_address": f"2001:0db8:0000:85a3:0000:0000:ac1f:800{i}",
},
"timestamp": iso_format(before_now(minutes=60 + i)),
},
Expand All @@ -326,7 +359,7 @@ def test_for_projects_multiple_query(self, mock_record):
assert eusers[0].ip_address == "1.1.1.0"
assert eusers[1].user_ident == id_2
assert eusers[1].email == email_2
assert eusers[1].ip_address == "8.8.8.5"
assert eusers[1].ip_address == "2001:db8:0:85a3::ac1f:8005"

mock_record.assert_has_calls(
[
Expand Down

0 comments on commit d3a44e0

Please sign in to comment.