Skip to content

Commit 15859bf

Browse files
authored
Merge pull request #5055 from broadinstitute/trigger-delete-families
Trigger delete families
2 parents 60b1ae3 + f5135fb commit 15859bf

File tree

8 files changed

+79
-70
lines changed

8 files changed

+79
-70
lines changed

clickhouse_search/search.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -667,11 +667,6 @@ def delete_clickhouse_project(project, dataset_type, sample_type=None):
667667
return f'Deleted all {dataset_type} search data for project {project.name}'
668668

669669

670-
def delete_clickhouse_family(project, family_guid, dataset_type, sample_type=None):
671-
dataset_type = _clickhouse_dataset_type(dataset_type, sample_type)
672-
return f'Clickhouse does not support deleting individual families from project. Manually delete {dataset_type} data for {family_guid} in project {project.guid}'
673-
674-
675670
SV_DATASET_TYPES = {
676671
Sample.SAMPLE_TYPE_WGS: Sample.DATASET_TYPE_SV_CALLS,
677672
Sample.SAMPLE_TYPE_WES: 'GCNV',

seqr/management/commands/transfer_families_to_different_project.py

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,13 @@
11
from django.core.management.base import BaseCommand
22

3-
from clickhouse_search.search import delete_clickhouse_family
4-
from seqr.models import Project, Family, VariantTag, VariantTagType, Sample
3+
from seqr.models import Project, Family, VariantTag, VariantTagType
54
from seqr.utils.search.utils import backend_specific_call
5+
from seqr.utils.search.add_data_utils import trigger_delete_families_search
66

77
import logging
88
logger = logging.getLogger(__name__)
99

1010

11-
def _disable_search(families, from_project):
12-
search_samples = Sample.objects.filter(is_active=True, individual__family__in=families)
13-
if search_samples:
14-
updated_families = search_samples.values_list("individual__family__family_id", flat=True).distinct()
15-
updated_family_dataset_types = list(search_samples.values_list('individual__family__guid', 'dataset_type', 'sample_type').distinct())
16-
family_summary = ", ".join(sorted(updated_families))
17-
num_updated = search_samples.update(is_active=False)
18-
logger.info(
19-
f'Disabled search for {num_updated} samples in the following {len(updated_families)} families: {family_summary}'
20-
)
21-
for update in updated_family_dataset_types:
22-
logger.info(delete_clickhouse_family(from_project, *update))
23-
24-
2511
class Command(BaseCommand):
2612
def add_arguments(self, parser):
2713
parser.add_argument('--from-project', required=True)
@@ -49,7 +35,7 @@ def handle(self, *args, **options):
4935
]
5036
logger.info(f'Skipping {num_found - len(families)} families with analysis groups in the project: {", ".join(group_families)}')
5137

52-
backend_specific_call(lambda *args: None, _disable_search)(families, from_project)
38+
backend_specific_call(lambda *args: None, trigger_delete_families_search)(from_project, list(families.values_list('guid', flat=True)))
5339

5440
for variant_tag_type in VariantTagType.objects.filter(project=from_project):
5541
variant_tags = VariantTag.objects.filter(saved_variants__family__in=families, variant_tag_type=variant_tag_type)

seqr/management/tests/transfer_families_to_different_project_tests.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from django.core.management import call_command
2+
import responses
23

34
from seqr.models import Family, VariantTagType, VariantTag, Sample
45
from seqr.views.utils.test_utils import AuthenticationTestCase, AnvilAuthenticationTestCase
@@ -9,7 +10,10 @@ class TransferFamiliesTest(object):
910
DEACTIVATE_SEARCH = True
1011
LOGS = []
1112

13+
@responses.activate
1214
def test_command(self):
15+
responses.add(responses.POST, 'http://pipeline-runner:6000/delete_families_enqueue', status=200)
16+
1317
call_command(
1418
'transfer_families_to_different_project', '--from-project=R0001_1kg', '--to-project=R0003_test', '2', '4', '5', '12',
1519
)
@@ -59,7 +63,5 @@ class TransferFamiliesClickhouseTest(TransferFamiliesTest, AnvilAuthenticationTe
5963
ES_HOSTNAME = ''
6064
LOGS = [
6165
('Disabled search for 7 samples in the following 1 families: 2', None),
62-
('Clickhouse does not support deleting individual families from project. Manually delete MITO data for F000002_2 in project R0001_1kg', None),
63-
('Clickhouse does not support deleting individual families from project. Manually delete SNV_INDEL data for F000002_2 in project R0001_1kg', None),
64-
('Clickhouse does not support deleting individual families from project. Manually delete GCNV data for F000002_2 in project R0001_1kg', None),
66+
('Triggered Delete Families', {'detail': {'project_guid': 'R0001_1kg', 'family_guids': ['F000002_2', 'F000004_4']}}),
6567
]

seqr/utils/search/add_data_utils.py

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from seqr.utils.middleware import ErrorsWarningsException
1414
from seqr.views.utils.airtable_utils import AirtableSession, ANVIL_REQUEST_TRACKING_TABLE
1515
from seqr.views.utils.export_utils import write_multiple_files
16+
from seqr.views.utils.json_utils import _to_title_case
1617
from seqr.views.utils.pedigree_info_utils import JsonConstants
1718
from settings import SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL, BASE_URL, ANVIL_UI_URL, PIPELINE_RUNNER_SERVER, \
1819
SEQR_SLACK_ANVIL_DATA_LOADING_CHANNEL, SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL, LOADING_DATASETS_DIR
@@ -65,6 +66,22 @@ def update_airtable_loading_tracking_status(project, status, additional_update=N
6566
update={'Status': status, **(additional_update or {})},
6667
)
6768

69+
def trigger_delete_families_search(project, family_guids, user=None):
70+
search_samples = Sample.objects.filter(is_active=True, individual__family__guid__in=family_guids)
71+
info = []
72+
if search_samples:
73+
updated_families = search_samples.values_list("individual__family__family_id", flat=True).distinct()
74+
family_summary = ", ".join(sorted(updated_families))
75+
num_updated = search_samples.update(is_active=False)
76+
message = f'Disabled search for {num_updated} samples in the following {len(updated_families)} families: {family_summary}'
77+
info.append(message)
78+
logger.info(message, user)
79+
80+
variables = {'project_guid': project.guid, 'family_guids': family_guids}
81+
_enqueue_pipeline_request('delete_families', variables, user)
82+
info.append('Triggered delete family data')
83+
return info
84+
6885
def trigger_data_loading(projects: list[Project], individual_ids: list[int], sample_type: str, dataset_type: str,
6986
genome_version: str, data_path: str, user: User, raise_error: bool = False, skip_expect_tdr_metrics: bool = True,
7087
skip_check_sex_and_relatedness: bool = True, vcf_sample_id_map=None,
@@ -85,34 +102,40 @@ def trigger_data_loading(projects: list[Project], individual_ids: list[int], sam
85102
_upload_data_loading_files(individual_ids, vcf_sample_id_map or {}, user, file_path, raise_error)
86103
_write_gene_id_file(user)
87104

88-
response = requests.post(f'{PIPELINE_RUNNER_SERVER}/loading_pipeline_enqueue', json=variables, timeout=60)
89-
success = True
105+
error = _enqueue_pipeline_request('loading_pipeline', variables, user, raise_error)
106+
if error:
107+
safe_post_to_slack(
108+
SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL,
109+
f'{error_message}: {error}\nLoading pipeline should be triggered with:\n```{json.dumps(variables, indent=4)}```',
110+
)
111+
112+
success = not error
113+
if success_message and (success or success_slack_channel != SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL):
114+
safe_post_to_slack(success_slack_channel, '\n\n'.join([
115+
success_message,
116+
f'Pedigree files have been uploaded to {file_path}',
117+
f'Loading pipeline is triggered with:\n```{json.dumps(variables, indent=4)}```',
118+
]))
119+
120+
return success
121+
122+
123+
def _enqueue_pipeline_request(name: str, variables: dict, user: User, raise_error: bool = True):
124+
response = requests.post(f'{PIPELINE_RUNNER_SERVER}/{name}_enqueue', json=variables, timeout=60)
125+
error = None
90126
try:
91127
response.raise_for_status()
92-
logger.info('Triggered loading pipeline', user, detail=variables)
128+
logger.info(f'Triggered {_to_title_case(name)}', user, detail=variables)
93129
except requests.HTTPError as e:
94-
success = False
95130
error = str(e)
96131
if response.status_code == 409:
97132
error = 'Loading pipeline is already running. Wait for it to complete and resubmit'
98133
e = ErrorsWarningsException([error])
99134
if raise_error:
100135
raise e
101136
else:
102-
logger.warning(f'Error triggering loading pipeline: {error}', user, detail=variables)
103-
safe_post_to_slack(
104-
SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL,
105-
f'{error_message}: {error}\nLoading pipeline should be triggered with:\n```{json.dumps(variables, indent=4)}```',
106-
)
107-
108-
if success_message and (success or success_slack_channel != SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL):
109-
safe_post_to_slack(success_slack_channel, '\n\n'.join([
110-
success_message,
111-
f'Pedigree files have been uploaded to {file_path}',
112-
f'Loading pipeline is triggered with:\n```{json.dumps(variables, indent=4)}```',
113-
]))
114-
115-
return success
137+
logger.warning(f'Error Triggering {_to_title_case(name)}: {error}', user, detail=variables)
138+
return error
116139

117140

118141
def _loading_dataset_type(sample_type: str, dataset_type: str):

seqr/views/apis/anvil_workspace_api_tests.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -910,7 +910,7 @@ def _test_mv_file_and_triggering_loading_exception(self, url, workspace, sample_
910910
self.manager_user, detail={f'{project.guid}_pedigree': sample_data})
911911
self.mock_api_logger.error.assert_not_called()
912912
self.mock_add_data_utils_logger.warning.assert_called_with(
913-
'Error triggering loading pipeline: Loading pipeline is already running. Wait for it to complete and resubmit',
913+
'Error Triggering Loading Pipeline: Loading pipeline is already running. Wait for it to complete and resubmit',
914914
self.manager_user, detail=variables,
915915
)
916916
self.mock_airtable_logger.error.assert_called_with(

seqr/views/apis/data_manager_api.py

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@
1313
from django.views.decorators.csrf import csrf_exempt
1414
from requests.exceptions import ConnectionError as RequestConnectionError
1515

16-
from clickhouse_search.search import delete_clickhouse_project, delete_clickhouse_family
16+
from clickhouse_search.search import delete_clickhouse_project
1717
from seqr.utils.communication_utils import send_project_notification
18-
from seqr.utils.search.add_data_utils import trigger_data_loading, get_loading_samples_validator
18+
from seqr.utils.search.add_data_utils import trigger_data_loading, get_loading_samples_validator, trigger_delete_families_search
1919
from seqr.utils.search.elasticsearch.es_utils import get_elasticsearch_status, delete_es_index
2020
from seqr.utils.search.utils import clickhouse_only, es_only, InvalidSearchException
2121
from seqr.utils.file_utils import file_iter, does_file_exist
@@ -424,9 +424,17 @@ def fetch_missing_vcf_samples(missing_vcf_samples):
424424
def trigger_delete_project(request):
425425
request_json = json.loads(request.body)
426426
project_guid = request_json.pop('project')
427+
dataset_type = request_json.get('datasetType')
427428
project = Project.objects.get(guid=project_guid)
428-
samples = Sample.objects.filter(individual__family__project=project)
429-
return _trigger_data_update(delete_clickhouse_project, request.user, samples, request_json, project)
429+
samples = Sample.objects.filter(individual__family__project=project, dataset_type=dataset_type, is_active=True)
430+
sample_types = list(
431+
samples.values_list('sample_type', flat=True).distinct()
432+
) if dataset_type == Sample.DATASET_TYPE_SV_CALLS else [None]
433+
updated = Sample.bulk_update(user=request.user, update_json={'is_active': False}, queryset=samples)
434+
info = [f'Deactivated search for {len(updated)} individuals']
435+
for sample_type in sample_types:
436+
info.append(delete_clickhouse_project(project, dataset_type=dataset_type, sample_type=sample_type))
437+
return create_json_response({'info': info})
430438

431439

432440
@data_manager_required
@@ -436,19 +444,7 @@ def trigger_delete_family(request):
436444
family_guid = request_json.pop('family')
437445
project = Project.objects.get(family__guid=family_guid)
438446
samples = Sample.objects.filter(individual__family__guid=family_guid)
439-
return _trigger_data_update(delete_clickhouse_family, request.user, samples, request_json, project, family_guid)
440-
441-
442-
def _trigger_data_update(clickhouse_func, user, samples, request_json, project, *args):
443-
dataset_type = request_json.get('datasetType')
444-
samples = samples.filter(dataset_type=dataset_type, is_active=True)
445-
sample_types = list(
446-
samples.values_list('sample_type', flat=True).distinct()
447-
) if dataset_type == Sample.DATASET_TYPE_SV_CALLS else [None]
448-
updated = Sample.bulk_update(user=user, update_json={'is_active': False}, queryset=samples)
449-
info = [f'Deactivated search for {len(updated)} individuals']
450-
for sample_type in sample_types:
451-
info.append(clickhouse_func(project, *args, dataset_type=dataset_type, sample_type=sample_type))
447+
info = trigger_delete_families_search(project, [family_guid], request.user)
452448
return create_json_response({'info': info})
453449

454450

seqr/views/apis/data_manager_api_tests.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -458,7 +458,8 @@
458458
'ABC123', 'NA19675_1', 'NA19678', 'NA19679', 'HG00731', 'HG00732', 'HG00733', 'NA20874', 'NA21234', 'NA21987',
459459
]
460460

461-
PIPELINE_RUNNER_URL = 'http://pipeline-runner:6000/loading_pipeline_enqueue'
461+
PIPELINE_RUNNER_HOST = 'http://pipeline-runner:6000'
462+
PIPELINE_RUNNER_URL = f'{PIPELINE_RUNNER_HOST}/loading_pipeline_enqueue'
462463

463464

464465
@mock.patch('seqr.views.apis.data_manager_api.LOADING_DATASETS_DIR', '/local_datasets')
@@ -1593,11 +1594,13 @@ def test_trigger_delete_project(self):
15931594

15941595
@responses.activate
15951596
def test_trigger_delete_family(self):
1597+
responses.add(responses.POST, f'{PIPELINE_RUNNER_HOST}/delete_families_enqueue', status=200)
1598+
15961599
url = reverse(trigger_delete_family)
15971600
self.check_data_manager_login(url)
15981601

15991602
Project.objects.filter(guid=PROJECT_GUID).update(genome_version='38')
1600-
response = self.client.post(url, content_type='application/json', data=json.dumps({'family': 'F000002_2', 'datasetType': 'SV'}))
1603+
response = self.client.post(url, content_type='application/json', data=json.dumps({'family': 'F000002_2'}))
16011604
self._assert_expected_delete_family(response)
16021605

16031606
def _assert_expected_delete_project(self, response):
@@ -1694,7 +1697,7 @@ def _has_expected_ped_files(self, mock_open, mock_gzip_open, mock_mkdir, dataset
16941697

16951698
def _assert_success_notification(self, variables):
16961699
self.maxDiff = None
1697-
self.assert_json_logs(self.pm_user, [('Triggered loading pipeline', {'detail': variables})])
1700+
self.assert_json_logs(self.pm_user, [('Triggered Loading Pipeline', {'detail': variables})])
16981701

16991702
def _trigger_error(self, url, body, variables, mock_open, mock_gzip_open, mock_mkdir):
17001703
super()._trigger_error(url, body, variables, mock_open, mock_gzip_open, mock_mkdir)
@@ -1915,7 +1918,7 @@ def _assert_trigger_error(self, response, body, variables, response_body):
19151918
'callset_path': variables['callset_path'].replace('callset.vcf', 'sv_callset.vcf'),
19161919
}
19171920
self.assert_json_logs(self.data_manager_user, [
1918-
(f'Error triggering loading pipeline: {error}', {'severity': 'WARNING', 'detail': variables}),
1921+
(f'Error Triggering Loading Pipeline: {error}', {'severity': 'WARNING', 'detail': variables}),
19191922
], offset=6)
19201923

19211924
error_message = f"""ERROR triggering internal WES SV loading: {error}
@@ -2082,14 +2085,19 @@ def _assert_expected_delete_family(self, response):
20822085
self.assertEqual(response.status_code, 200)
20832086
self.assertDictEqual(response.json(), {
20842087
'info': [
2085-
'Deactivated search for 3 individuals',
2086-
'Clickhouse does not support deleting individual families from project. Manually delete GCNV data for F000002_2 in project R0001_1kg',
2088+
'Disabled search for 7 samples in the following 1 families: 2',
2089+
'Triggered delete family data',
20872090
],
20882091
})
20892092

20902093
family_samples = Sample.objects.filter(individual__family_id=2, is_active=True)
2091-
self.assertEqual(family_samples.filter(dataset_type='SV').count(), 0)
2092-
self.assertEqual(family_samples.count(),4)
2094+
self.assertEqual(family_samples.count(),0)
2095+
2096+
self.assertEqual(len(responses.calls), 1)
2097+
self.assertDictEqual(json.loads(responses.calls[-1].request.body), {
2098+
'project_guid': 'R0001_1kg',
2099+
'family_guids': ['F000002_2'],
2100+
})
20932101

20942102
def _assert_expected_airtable_errors(self, url):
20952103
responses.replace(

ui/pages/DataManagement/components/TriggerSearchDataUpdatePages.jsx

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ const FAMILY_FIELDS = [
4343
placeholder: 'Search for a family',
4444
validate: validators.required,
4545
},
46-
DATASET_TYPE_FIELD,
4746
]
4847

4948
const TriggerSearchDataUpdateForm = ({ entity, fields }) => (

0 commit comments

Comments
 (0)