Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion migrations_lockfile.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ feedback: 0004_index_together
hybridcloud: 0016_add_control_cacheversion
nodestore: 0002_nodestore_no_dictfield
replays: 0004_index_together
sentry: 0725_create_sentry_groupsearchview_table
sentry: 0726_apitoken_backfill_hashes
social_auth: 0002_default_auto_field
154 changes: 154 additions & 0 deletions src/sentry/migrations/0726_apitoken_backfill_hashes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
# Generated by Django 5.0.6 on 2024-05-29 21:28

import hashlib
import logging
from enum import IntEnum

from django.db import migrations, router
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
from django.db.migrations.state import StateApps

from sentry.new_migrations.migrations import CheckedMigration
from sentry.utils.query import RangeQuerySetWrapperWithProgressBar

logger = logging.getLogger(__name__)


def backfill_hash_values(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None:
ApiToken = apps.get_model("sentry", "ApiToken")
ControlOutbox = apps.get_model("sentry", "ControlOutbox")
OrganizationMemberMapping = apps.get_model("sentry", "OrganizationMemberMapping")
OrganizationMapping = apps.get_model("sentry", "OrganizationMapping")

try:
from collections.abc import Container

from django.conf import settings

from sentry.services.hybrid_cloud.util import control_silo_function
from sentry.silo.base import SiloMode
from sentry.silo.safety import unguarded_write
except ImportError:
logger.exception("Cannot execute migration. Required symbols could not be imported")
return

# copied from src/sentry/models/outbox.py
class OutboxCategory(IntEnum):
USER_UPDATE = 0
UNUSED_TWO = 4
UNUSUED_THREE = 13
UNUSED_ONE = 19
AUTH_IDENTITY_UPDATE = 25
API_TOKEN_UPDATE = 32

# copied from src/sentry/models/outbox.py
_outbox_categories_for_scope: dict[int, set[OutboxCategory]] = {}
_used_categories: set[OutboxCategory] = set()

# copied from src/sentry/models/outbox.py
def scope_categories(enum_value: int, categories: set[OutboxCategory]) -> int:
_outbox_categories_for_scope[enum_value] = categories
inter = _used_categories.intersection(categories)
assert not inter, f"OutboxCategories {inter} were already registered to a different scope"
_used_categories.update(categories)
return enum_value

# copied from src/sentry/models/outbox.py
class OutboxScope(IntEnum):
USER_SCOPE = scope_categories(
1,
{
OutboxCategory.USER_UPDATE,
OutboxCategory.API_TOKEN_UPDATE,
OutboxCategory.UNUSED_ONE,
OutboxCategory.UNUSED_TWO,
OutboxCategory.UNUSUED_THREE,
OutboxCategory.AUTH_IDENTITY_UPDATE,
},
)

@control_silo_function
def _find_orgs_for_user(user_id: int) -> set[int]:
return {
m["organization_id"]
for m in OrganizationMemberMapping.objects.filter(user_id=user_id).values(
"organization_id"
)
}

@control_silo_function
def find_regions_for_orgs(org_ids: Container[int]) -> set[str]:
if SiloMode.get_current_mode() == SiloMode.MONOLITH:
return {settings.SENTRY_MONOLITH_REGION}
else:
return set(
OrganizationMapping.objects.filter(organization_id__in=org_ids).values_list(
"region_name", flat=True
)
)

@control_silo_function
def find_regions_for_user(user_id: int) -> set[str]:
if SiloMode.get_current_mode() == SiloMode.MONOLITH:
return {settings.SENTRY_MONOLITH_REGION}

org_ids = _find_orgs_for_user(user_id)
return find_regions_for_orgs(org_ids)

for api_token in RangeQuerySetWrapperWithProgressBar(ApiToken.objects.all()):
hashed_token = None
if api_token.hashed_token is None:
hashed_token = hashlib.sha256(api_token.token.encode()).hexdigest()
api_token.hashed_token = hashed_token

# if there's a refresh token make sure it is hashed as well
hashed_refresh_token = None
if api_token.refresh_token:
hashed_refresh_token = hashlib.sha256(api_token.refresh_token.encode()).hexdigest()
api_token.hashed_refresh_token = hashed_refresh_token

# only save if we've actually had to hash values
if hashed_token or hashed_refresh_token:
with unguarded_write(using=router.db_for_write(ApiToken)):
api_token.save(update_fields=["hashed_token", "hashed_refresh_token"])
user_regions = find_regions_for_user(api_token.user_id)
for region in user_regions:
ControlOutbox.objects.create(
shard_scope=OutboxScope.USER_SCOPE,
shard_identifier=api_token.user_id,
category=OutboxCategory.API_TOKEN_UPDATE,
region_name=region,
object_identifier=api_token.id,
)


class Migration(CheckedMigration):
# This flag is used to mark that a migration shouldn't be automatically run in production.
# This should only be used for operations where it's safe to run the migration after your
# code has deployed. So this should not be used for most operations that alter the schema
# of a table.
# Here are some things that make sense to mark as post deployment:
# - Large data migrations. Typically we want these to be run manually so that they can be
# monitored and not block the deploy for a long period of time while they run.
# - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
# run this outside deployments so that we don't block them. Note that while adding an index
# is a schema change, it's completely safe to run the operation after the code has deployed.
# Once deployed, run these manually via: https://develop.sentry.dev/database-migrations/#migration-deployment

is_post_deployment = True

dependencies = [
("sentry", "0725_create_sentry_groupsearchview_table"),
]

operations = [
migrations.RunPython(
backfill_hash_values,
migrations.RunPython.noop,
hints={
"tables": [
"sentry_apitoken",
]
},
)
]
53 changes: 53 additions & 0 deletions tests/sentry/migrations/test_0726_apitoken_backfill_hashes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from sentry.models.outbox import ControlOutbox, OutboxCategory, OutboxScope
from sentry.testutils.cases import TestMigrations
from sentry.testutils.helpers import override_options
from sentry.testutils.silo import control_silo_test


@control_silo_test
class TestBackfillApiTokenHashesMigration(TestMigrations):
migrate_from = "0725_create_sentry_groupsearchview_table"
migrate_to = "0726_apitoken_backfill_hashes"
connection = "control"

@override_options({"apitoken.save-hash-on-create": False})
def setup_initial_state(self):
user = self.create_user()
self.user_auth_token = self.create_user_auth_token(user=user)

# Put the user in an org so we have membership
organization = self.create_organization(owner=user)

app = self.create_sentry_app(user=user, organization_id=organization.id)
self.app_install = self.create_sentry_app_installation(
organization=organization, user=user, slug=app.slug
)

assert self.user_auth_token.hashed_token is None
# user auth tokens do not have refresh tokens
assert self.user_auth_token.refresh_token is None

assert self.app_install.api_token.hashed_token is None
assert self.app_install.api_token.hashed_refresh_token is None
# tokens related to sentry apps do have refresh tokens
assert self.app_install.api_token.refresh_token is not None

def test_for_hashed_value(self):
self.user_auth_token.refresh_from_db()
assert self.user_auth_token.hashed_token is not None
assert ControlOutbox.objects.get(
shard_scope=OutboxScope.USER_SCOPE,
category=OutboxCategory.API_TOKEN_UPDATE,
object_identifier=self.user_auth_token.id,
shard_identifier=self.user_auth_token.user_id,
)

self.app_install.refresh_from_db()
assert self.app_install.api_token.hashed_token is not None
assert self.app_install.api_token.hashed_refresh_token is not None
assert ControlOutbox.objects.get(
shard_scope=OutboxScope.USER_SCOPE,
category=OutboxCategory.API_TOKEN_UPDATE,
object_identifier=self.app_install.api_token.id,
shard_identifier=self.app_install.api_token.user_id,
)