Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions src/sentry/integrations/services/repository/impl.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from __future__ import annotations

from datetime import timedelta
from typing import Any

from django.db import IntegrityError, router, transaction
from django.utils import timezone

from sentry.api.serializers import serialize
from sentry.constants import ObjectStatus
Expand All @@ -12,7 +14,10 @@
from sentry.integrations.services.repository import RepositoryService, RpcRepository
from sentry.integrations.services.repository.model import RpcCreateRepository
from sentry.integrations.services.repository.serial import serialize_repository
from sentry.models.code_review_event import CodeReviewEvent
from sentry.models.commit import Commit
from sentry.models.projectcodeowners import ProjectCodeOwners
from sentry.models.pullrequest import PullRequest
from sentry.models.repository import Repository
from sentry.tasks.seer.cleanup import (
bulk_cleanup_seer_repository_preferences,
Expand Down Expand Up @@ -165,6 +170,65 @@ def disable_repositories_for_integration(
using=router.db_for_write(Repository),
)

def find_recently_active_repo_external_ids(
self,
*,
organization_id: int,
integration_id: int,
provider: str,
external_ids: list[str],
cutoff_days: int,
) -> list[str]:
if not external_ids:
return []

cutoff = timezone.now() - timedelta(days=cutoff_days)
repo_id_to_external = dict(
Repository.objects.filter(
organization_id=organization_id,
integration_id=integration_id,
provider=provider,
external_id__in=external_ids,
status=ObjectStatus.ACTIVE,
).values_list("id", "external_id")
)
if not repo_id_to_external:
return []

repo_ids = list(repo_id_to_external.keys())
active_repo_ids: set[int] = set()

active_repo_ids.update(
Commit.objects.filter(
repository_id__in=repo_ids,
date_added__gte=cutoff,
)
.values_list("repository_id", flat=True)
.distinct()
)
active_repo_ids.update(
PullRequest.objects.filter(
repository_id__in=repo_ids,
date_added__gte=cutoff,
)
.values_list("repository_id", flat=True)
.distinct()
)
active_repo_ids.update(
CodeReviewEvent.objects.filter(
organization_id=organization_id,
repository_id__in=repo_ids,
trigger_at__gte=cutoff,
)
.values_list("repository_id", flat=True)
.distinct()
)
return [
eid
for rid, eid in repo_id_to_external.items()
if rid in active_repo_ids and eid is not None
]

def disable_repositories_by_external_ids(
self,
*,
Expand Down
17 changes: 17 additions & 0 deletions src/sentry/integrations/services/repository/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,23 @@ def disable_repositories_by_external_ids(
Only active repositories are affected. Code mappings and commits are preserved.
"""

@cell_rpc_method(resolve=ByOrganizationId())
@abstractmethod
def find_recently_active_repo_external_ids(
self,
*,
organization_id: int,
integration_id: int,
provider: str,
external_ids: list[str],
cutoff_days: int,
) -> list[str]:
"""
Of the given ``external_ids`` (scoped to the org/integration/provider),
return the subset whose underlying ``Repository`` rows have at least one
commit, pull request, or code review event in the last ``cutoff_days``.
"""

@cell_rpc_method(resolve=ByOrganizationId())
@abstractmethod
def disassociate_organization_integration(
Expand Down
48 changes: 43 additions & 5 deletions src/sentry/integrations/source_code_management/sync_repos.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@

SYNC_BATCH_SIZE = 100

# Final safety guard before auto-disabling a repo: if it has any commit, PR,
# or code review event row in the last DISABLE_ACTIVITY_CUTOFF_DAYS days, we
# skip the disable even if the provider isn't returning the repo right now.
DISABLE_ACTIVITY_CUTOFF_DAYS = 30


def bump_org_integration_last_sync(
organization_integration_id: int, *, repos_changed: bool = False
Expand Down Expand Up @@ -232,12 +237,46 @@ def sync_repos_for_org(organization_integration_id: int) -> None:
return

removals_enabled = _has_feature("organizations:scm-repo-auto-sync-removal", rpc_org)

# Filter out any repo with recent activity (commits, PRs, code review
Comment thread
wedamija marked this conversation as resolved.
# events) before disable.
removed_id_list = list(removed_ids)
active_skipped: set[str] = set()
if removed_id_list:
active_skipped = set(
repository_service.find_recently_active_repo_external_ids(
organization_id=rpc_org.id,
integration_id=integration.id,
provider=provider,
external_ids=removed_id_list,
cutoff_days=DISABLE_ACTIVITY_CUTOFF_DAYS,
)
)
if active_skipped:
logger.info(
"scm.repo_sync.disable_skipped_due_to_activity",
extra={
"provider": provider_key,
"integration_id": integration.id,
"organization_id": rpc_org.id,
"candidate_count": len(removed_id_list),
"skipped_count": len(active_skipped),
"skipped_ids": list(active_skipped),
"cutoff_days": DISABLE_ACTIVITY_CUTOFF_DAYS,
},
)
metrics.distribution(
"scm.repo_sync.disable_skipped_due_to_activity",
len(active_skipped),
tags={"provider": provider_key},
sample_rate=1.0,
)

safe_to_disable = [eid for eid in removed_id_list if eid not in active_skipped]
bump_org_integration_last_sync(
organization_integration_id,
repos_changed=bool(new_ids or restored_ids or (removed_ids and removals_enabled)),
repos_changed=bool(new_ids or restored_ids or (safe_to_disable and removals_enabled)),
)

# Build repo configs for new repos
new_repo_configs = [
{
**repo,
Expand All @@ -248,7 +287,6 @@ def sync_repos_for_org(organization_integration_id: int) -> None:
for repo in provider_repos
if repo["external_id"] in new_ids
]
removed_id_list = list(removed_ids)
restored_id_list = list(restored_ids)

for config_batch in chunked(new_repo_configs, SYNC_BATCH_SIZE):
Expand All @@ -260,7 +298,7 @@ def sync_repos_for_org(organization_integration_id: int) -> None:
)

if removals_enabled:
for removed_batch in chunked(removed_id_list, SYNC_BATCH_SIZE):
for removed_batch in chunked(safe_to_disable, SYNC_BATCH_SIZE):
disable_repos_batch.apply_async(
kwargs={
"organization_integration_id": organization_integration_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from sentry.integrations.models.organization_integration import OrganizationIntegration
from sentry.integrations.source_code_management.sync_repos import sync_repos_for_org
from sentry.models.auditlogentry import AuditLogEntry
from sentry.models.commit import Commit
from sentry.models.repository import Repository
from sentry.silo.base import SiloMode
from sentry.testutils.cases import IntegrationTestCase, TestCase
Expand Down Expand Up @@ -118,6 +119,48 @@ def test_disables_removed_repos(self, _: MagicMock, __: MagicMock) -> None:
event=audit_log.get_event_id("REPO_ADDED"),
).exists()

@patch(
"sentry.tasks.seer.cleanup.make_bulk_remove_repositories_request",
return_value=MagicMock(status=200),
)
@responses.activate
def test_skips_disable_for_repo_with_recent_activity(self, _: MagicMock, __: MagicMock) -> None:
# A repo that's missing from the provider's listing AND has a recent
# commit row should NOT be disabled — the activity says it's still
# live, so the provider listing is more likely wrong than the repo
# being deleted. Used as a final safety guard before disable.
with assume_test_silo_mode(SiloMode.CELL):
still_active_repo = Repository.objects.create(
organization_id=self.organization.id,
name="getsentry/old-but-active-repo",
external_id="99",
provider="integrations:github",
integration_id=self.integration.id,
status=ObjectStatus.ACTIVE,
)
Commit.objects.create(
organization_id=self.organization.id,
repository_id=still_active_repo.id,
key="abc123",
)

# Provider isn't returning the active repo
self._add_repos_response([{"id": 1, "full_name": "getsentry/sentry", "name": "sentry"}])

with self.feature(
[
"organizations:github-repo-auto-sync",
"organizations:github-repo-auto-sync-apply",
"organizations:scm-repo-auto-sync-removal",
]
):
with self.tasks():
sync_repos_for_org(self.oi.id)

with assume_test_silo_mode(SiloMode.CELL):
still_active_repo.refresh_from_db()
assert still_active_repo.status == ObjectStatus.ACTIVE

@responses.activate
def test_re_enables_restored_repos(self, _: MagicMock) -> None:
with assume_test_silo_mode(SiloMode.CELL):
Expand Down
Loading